From 65bb2e0fc5a67200db95911207ef9662aaf7ab9d Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Tue, 18 Aug 2020 07:16:11 +0000 Subject: libbpf: Convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Xu Wang Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200818071611.21923-1-vulab@iscas.ac.cn --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5d20b2da4427..5055e1531e43 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8594,7 +8594,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params p = {}; struct perf_event_attr attr = { 0, }; - attr.config = PERF_COUNT_SW_BPF_OUTPUT, + attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; attr.sample_period = 1; -- cgit v1.3.1 From 8d708236058601f1b08b0ca2a1cd9fff28034416 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 14:33:50 -0700 Subject: libbpf: Disable -Wswitch-enum compiler warning That compilation warning is more annoying, than helpful. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818213356.2629020-2-andriin@fb.com --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index bf8ed134cb8a..95c946e94ca5 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -107,7 +107,7 @@ ifeq ($(feature-reallocarray), 0) endif # Append required CFLAGS -override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum override CFLAGS += -Werror -Wall override CFLAGS += -fPIC override CFLAGS += $(INCLUDES) -- cgit v1.3.1 From 47b6cb4d0add454859f386337e6079e01ac7db8f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 14:33:51 -0700 Subject: libbpf: Make kernel feature probing lazy Turn libbpf's kernel feature probing into lazily-performed checks. This allows to skip performing unnecessary feature checks, if a given BPF application doesn't rely on a particular kernel feature. As we grow number of feature probes, libbpf might perform less unnecessary syscalls and scale better with number of feature probes long-term. By decoupling feature checks from bpf_object, it's also possible to perform feature probing from libbpf static helpers and low-level APIs, if necessary. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818213356.2629020-3-andriin@fb.com --- tools/lib/bpf/libbpf.c | 150 ++++++++++++++++++++++++++++--------------------- 1 file changed, 86 insertions(+), 64 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5055e1531e43..2ee168822d33 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -165,23 +165,26 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } -struct bpf_capabilities { +enum kern_feature_id { /* v4.14: kernel support for program & map names. */ - __u32 name:1; + FEAT_PROG_NAME, /* v5.2: kernel support for global data sections. */ - __u32 global_data:1; + FEAT_GLOBAL_DATA, /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ - __u32 btf_func:1; + FEAT_BTF_FUNC, /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ - __u32 btf_datasec:1; - /* BPF_F_MMAPABLE is supported for arrays */ - __u32 array_mmap:1; + FEAT_BTF_DATASEC, /* BTF_FUNC_GLOBAL is supported */ - __u32 btf_func_global:1; + FEAT_BTF_GLOBAL_FUNC, + /* BPF_F_MMAPABLE is supported for arrays */ + FEAT_ARRAY_MMAP, /* kernel support for expected_attach_type in BPF_PROG_LOAD */ - __u32 exp_attach_type:1; + FEAT_EXP_ATTACH_TYPE, + __FEAT_CNT, }; +static bool kernel_supports(enum kern_feature_id feat_id); + enum reloc_type { RELO_LD64, RELO_CALL, @@ -253,8 +256,6 @@ struct bpf_program { __u32 func_info_rec_size; __u32 func_info_cnt; - struct bpf_capabilities *caps; - void *line_info; __u32 line_info_rec_size; __u32 line_info_cnt; @@ -436,8 +437,6 @@ struct bpf_object { void *priv; bpf_object_clear_priv_t clear_priv; - struct bpf_capabilities caps; - char path[]; }; #define obj_elf_valid(o) ((o)->efile.elf) @@ -561,7 +560,6 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, if (err) return err; - prog.caps = &obj->caps; progs = obj->programs; nr_progs = obj->nr_programs; @@ -2340,18 +2338,18 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) static bool btf_needs_sanitization(struct bpf_object *obj) { - bool has_func_global = obj->caps.btf_func_global; - bool has_datasec = obj->caps.btf_datasec; - bool has_func = obj->caps.btf_func; + bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); + bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); + bool has_func = kernel_supports(FEAT_BTF_FUNC); return !has_func || !has_datasec || !has_func_global; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { - bool has_func_global = obj->caps.btf_func_global; - bool has_datasec = obj->caps.btf_datasec; - bool has_func = obj->caps.btf_func; + bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); + bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); + bool has_func = kernel_supports(FEAT_BTF_FUNC); struct btf_type *t; int i, j, vlen; @@ -3433,8 +3431,7 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int -bpf_object__probe_name(struct bpf_object *obj) +static int probe_kern_prog_name(void) { struct bpf_load_program_attr attr; struct bpf_insn insns[] = { @@ -3453,15 +3450,14 @@ bpf_object__probe_name(struct bpf_object *obj) attr.name = "test"; ret = bpf_load_program_xattr(&attr, NULL, 0); if (ret >= 0) { - obj->caps.name = 1; close(ret); + return 1; } return 0; } -static int -bpf_object__probe_global_data(struct bpf_object *obj) +static int probe_kern_global_data(void) { struct bpf_load_program_attr prg_attr; struct bpf_create_map_attr map_attr; @@ -3498,16 +3494,16 @@ bpf_object__probe_global_data(struct bpf_object *obj) prg_attr.license = "GPL"; ret = bpf_load_program_xattr(&prg_attr, NULL, 0); + close(map); if (ret >= 0) { - obj->caps.global_data = 1; close(ret); + return 1; } - close(map); return 0; } -static int bpf_object__probe_btf_func(struct bpf_object *obj) +static int probe_kern_btf_func(void) { static const char strs[] = "\0int\0x\0a"; /* void x(int a) {} */ @@ -3525,7 +3521,6 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj) btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); if (btf_fd >= 0) { - obj->caps.btf_func = 1; close(btf_fd); return 1; } @@ -3533,7 +3528,7 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj) return 0; } -static int bpf_object__probe_btf_func_global(struct bpf_object *obj) +static int probe_kern_btf_func_global(void) { static const char strs[] = "\0int\0x\0a"; /* static void x(int a) {} */ @@ -3551,7 +3546,6 @@ static int bpf_object__probe_btf_func_global(struct bpf_object *obj) btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); if (btf_fd >= 0) { - obj->caps.btf_func_global = 1; close(btf_fd); return 1; } @@ -3559,7 +3553,7 @@ static int bpf_object__probe_btf_func_global(struct bpf_object *obj) return 0; } -static int bpf_object__probe_btf_datasec(struct bpf_object *obj) +static int probe_kern_btf_datasec(void) { static const char strs[] = "\0x\0.data"; /* static int a; */ @@ -3578,7 +3572,6 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj) btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); if (btf_fd >= 0) { - obj->caps.btf_datasec = 1; close(btf_fd); return 1; } @@ -3586,7 +3579,7 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj) return 0; } -static int bpf_object__probe_array_mmap(struct bpf_object *obj) +static int probe_kern_array_mmap(void) { struct bpf_create_map_attr attr = { .map_type = BPF_MAP_TYPE_ARRAY, @@ -3599,16 +3592,13 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj) fd = bpf_create_map_xattr(&attr); if (fd >= 0) { - obj->caps.array_mmap = 1; close(fd); return 1; } - return 0; } -static int -bpf_object__probe_exp_attach_type(struct bpf_object *obj) +static int probe_kern_exp_attach_type(void) { struct bpf_load_program_attr attr; struct bpf_insn insns[] = { @@ -3631,34 +3621,67 @@ bpf_object__probe_exp_attach_type(struct bpf_object *obj) fd = bpf_load_program_xattr(&attr, NULL, 0); if (fd >= 0) { - obj->caps.exp_attach_type = 1; close(fd); return 1; } return 0; } -static int -bpf_object__probe_caps(struct bpf_object *obj) -{ - int (*probe_fn[])(struct bpf_object *obj) = { - bpf_object__probe_name, - bpf_object__probe_global_data, - bpf_object__probe_btf_func, - bpf_object__probe_btf_func_global, - bpf_object__probe_btf_datasec, - bpf_object__probe_array_mmap, - bpf_object__probe_exp_attach_type, - }; - int i, ret; +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; - for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { - ret = probe_fn[i](obj); - if (ret < 0) - pr_debug("Probe #%d failed with %d.\n", i, ret); +typedef int (*feature_probe_fn)(void); + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; + enum kern_feature_result res; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, +}; + +static bool kernel_supports(enum kern_feature_id feat_id) +{ + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + + if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { + ret = feat->probe(); + if (ret > 0) { + WRITE_ONCE(feat->res, FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(feat->res, FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(feat->res, FEAT_MISSING); + } } - return 0; + return READ_ONCE(feat->res) == FEAT_SUPPORTED; } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) @@ -3760,7 +3783,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) memset(&create_attr, 0, sizeof(create_attr)); - if (obj->caps.name) + if (kernel_supports(FEAT_PROG_NAME)) create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; create_attr.map_type = def->type; @@ -5364,12 +5387,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); load_attr.prog_type = prog->type; /* old kernels might not support specifying expected_attach_type */ - if (!prog->caps->exp_attach_type && prog->sec_def && + if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def && prog->sec_def->is_exp_attach_type_optional) load_attr.expected_attach_type = 0; else load_attr.expected_attach_type = prog->expected_attach_type; - if (prog->caps->name) + if (kernel_supports(FEAT_PROG_NAME)) load_attr.name = prog->name; load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; @@ -5387,7 +5410,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(prog->obj); - if (btf_fd >= 0 && prog->obj->caps.btf_func) { + if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) { load_attr.prog_btf_fd = btf_fd; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; @@ -5750,11 +5773,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) bpf_object__for_each_map(m, obj) { if (!bpf_map__is_internal(m)) continue; - if (!obj->caps.global_data) { + if (!kernel_supports(FEAT_GLOBAL_DATA)) { pr_warn("kernel doesn't support global data\n"); return -ENOTSUP; } - if (!obj->caps.array_mmap) + if (!kernel_supports(FEAT_ARRAY_MMAP)) m->def.map_flags ^= BPF_F_MMAPABLE; } @@ -5904,7 +5927,6 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) } err = bpf_object__probe_loading(obj); - err = err ? : bpf_object__probe_caps(obj); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__sanitize_maps(obj); -- cgit v1.3.1 From bb180fb2401dfed0ba85699064e60d62af351ff9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 14:33:52 -0700 Subject: libbpf: Factor out common logic of testing and closing FD Factor out common piece of logic that detects support for a feature based on successfully created FD. Also take care of closing FD, if it was created. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818213356.2629020-4-andriin@fb.com --- tools/lib/bpf/libbpf.c | 70 ++++++++++++-------------------------------------- 1 file changed, 17 insertions(+), 53 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2ee168822d33..08875ab9fe35 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3431,6 +3431,13 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } +static int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + static int probe_kern_prog_name(void) { struct bpf_load_program_attr attr; @@ -3449,12 +3456,7 @@ static int probe_kern_prog_name(void) attr.license = "GPL"; attr.name = "test"; ret = bpf_load_program_xattr(&attr, NULL, 0); - if (ret >= 0) { - close(ret); - return 1; - } - - return 0; + return probe_fd(ret); } static int probe_kern_global_data(void) @@ -3495,12 +3497,7 @@ static int probe_kern_global_data(void) ret = bpf_load_program_xattr(&prg_attr, NULL, 0); close(map); - if (ret >= 0) { - close(ret); - return 1; - } - - return 0; + return probe_fd(ret); } static int probe_kern_btf_func(void) @@ -3516,16 +3513,9 @@ static int probe_kern_btf_func(void) /* FUNC x */ /* [3] */ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), }; - int btf_fd; - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); - if (btf_fd >= 0) { - close(btf_fd); - return 1; - } - - return 0; + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); } static int probe_kern_btf_func_global(void) @@ -3541,16 +3531,9 @@ static int probe_kern_btf_func_global(void) /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), }; - int btf_fd; - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); - if (btf_fd >= 0) { - close(btf_fd); - return 1; - } - - return 0; + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); } static int probe_kern_btf_datasec(void) @@ -3567,16 +3550,9 @@ static int probe_kern_btf_datasec(void) BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), BTF_VAR_SECINFO_ENC(2, 0, 4), }; - int btf_fd; - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); - if (btf_fd >= 0) { - close(btf_fd); - return 1; - } - - return 0; + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); } static int probe_kern_array_mmap(void) @@ -3588,14 +3564,8 @@ static int probe_kern_array_mmap(void) .value_size = sizeof(int), .max_entries = 1, }; - int fd; - fd = bpf_create_map_xattr(&attr); - if (fd >= 0) { - close(fd); - return 1; - } - return 0; + return probe_fd(bpf_create_map_xattr(&attr)); } static int probe_kern_exp_attach_type(void) @@ -3605,7 +3575,6 @@ static int probe_kern_exp_attach_type(void) BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int fd; memset(&attr, 0, sizeof(attr)); /* use any valid combination of program type and (optional) @@ -3619,12 +3588,7 @@ static int probe_kern_exp_attach_type(void) attr.insns_cnt = ARRAY_SIZE(insns); attr.license = "GPL"; - fd = bpf_load_program_xattr(&attr, NULL, 0); - if (fd >= 0) { - close(fd); - return 1; - } - return 0; + return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); } enum kern_feature_result { -- cgit v1.3.1 From 109cea5a594f921632ca599b385aaa0575971045 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 14:33:53 -0700 Subject: libbpf: Sanitize BPF program code for bpf_probe_read_{kernel, user}[_str] Add BPF program code sanitization pass, replacing calls to BPF bpf_probe_read_{kernel,user}[_str]() helpers with bpf_probe_read[_str](), if libbpf detects that kernel doesn't support new variants. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818213356.2629020-5-andriin@fb.com --- tools/lib/bpf/libbpf.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 08875ab9fe35..e3ab1794d2c3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -180,6 +180,8 @@ enum kern_feature_id { FEAT_ARRAY_MMAP, /* kernel support for expected_attach_type in BPF_PROG_LOAD */ FEAT_EXP_ATTACH_TYPE, + /* bpf_probe_read_{kernel,user}[_str] helpers */ + FEAT_PROBE_READ_KERN, __FEAT_CNT, }; @@ -3591,6 +3593,27 @@ static int probe_kern_exp_attach_type(void) return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); } +static int probe_kern_probe_read_kernel(void) +{ + struct bpf_load_program_attr attr; + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_KPROBE; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -3626,6 +3649,9 @@ static struct kern_feature_desc { "BPF_PROG_LOAD expected_attach_type attribute", probe_kern_exp_attach_type, }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + } }; static bool kernel_supports(enum kern_feature_id feat_id) @@ -5335,6 +5361,53 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) return 0; } +static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) +{ + __u8 class = BPF_CLASS(insn->code); + + if ((class == BPF_JMP || class == BPF_JMP32) && + BPF_OP(insn->code) == BPF_CALL && + BPF_SRC(insn->code) == BPF_K && + insn->src_reg == 0 && insn->dst_reg == 0) { + if (func_id) + *func_id = insn->imm; + return true; + } + return false; +} + +static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog) +{ + struct bpf_insn *insn = prog->insns; + enum bpf_func_id func_id; + int i; + + for (i = 0; i < prog->insns_cnt; i++, insn++) { + if (!insn_is_helper_call(insn, &func_id)) + continue; + + /* on kernels that don't yet support + * bpf_probe_read_{kernel,user}[_str] helpers, fall back + * to bpf_probe_read() which works well for old kernels + */ + switch (func_id) { + case BPF_FUNC_probe_read_kernel: + case BPF_FUNC_probe_read_user: + if (!kernel_supports(FEAT_PROBE_READ_KERN)) + insn->imm = BPF_FUNC_probe_read; + break; + case BPF_FUNC_probe_read_kernel_str: + case BPF_FUNC_probe_read_user_str: + if (!kernel_supports(FEAT_PROBE_READ_KERN)) + insn->imm = BPF_FUNC_probe_read_str; + break; + default: + break; + } + } + return 0; +} + static int load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *license, __u32 kern_version, int *pfd) @@ -5549,6 +5622,13 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) size_t i; int err; + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + err = bpf_object__sanitize_prog(obj, prog); + if (err) + return err; + } + for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; if (bpf_program__is_function_storage(prog, obj)) -- cgit v1.3.1 From 02f47faa25db134f6043fb6b12a68b5d4c980bb6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 14:33:54 -0700 Subject: selftests/bpf: Fix test_vmlinux test to use bpf_probe_read_user() The test is reading UAPI kernel structure from user-space. So it doesn't need CO-RE relocations and has to use bpf_probe_read_user(). Fixes: acbd06206bbb ("selftests/bpf: Add vmlinux.h selftest exercising tracing of syscalls") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818213356.2629020-6-andriin@fb.com --- tools/testing/selftests/bpf/progs/test_vmlinux.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 29fa09d6a6c6..e9dfa0313d1b 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep") int handle__tp(struct trace_event_raw_sys_enter *args) { struct __kernel_timespec *ts; + long tv_nsec; if (args->id != __NR_nanosleep) return 0; ts = (void *)args->args[0]; - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; tp_called = true; @@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter") int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) { struct __kernel_timespec *ts; + long tv_nsec; if (id != __NR_nanosleep) return 0; ts = (void *)PT_REGS_PARM1_CORE(regs); - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; raw_tp_called = true; @@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter") int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) { struct __kernel_timespec *ts; + long tv_nsec; if (id != __NR_nanosleep) return 0; ts = (void *)PT_REGS_PARM1_CORE(regs); - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; tp_btf_called = true; -- cgit v1.3.1 From 70785cfb19287b4b075a266887be5e024bf1206a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 14:33:55 -0700 Subject: libbpf: Switch tracing and CO-RE helper macros to bpf_probe_read_kernel() Now that libbpf can automatically fallback to bpf_probe_read() on old kernels not yet supporting bpf_probe_read_kernel(), switch libbpf BPF-side helper macros to use appropriate BPF helper for reading kernel data. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Cc: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20200818213356.2629020-7-andriin@fb.com --- tools/lib/bpf/bpf_core_read.h | 40 ++++++++++++++++++++++------------------ tools/lib/bpf/bpf_tracing.h | 4 ++-- 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index eae5cccff761..03152cb143b7 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -24,27 +24,29 @@ enum bpf_field_info_kind { #if __BYTE_ORDER == __LITTLE_ENDIAN #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ - bpf_probe_read((void *)dst, \ - __CORE_RELO(src, fld, BYTE_SIZE), \ - (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) + bpf_probe_read_kernel( \ + (void *)dst, \ + __CORE_RELO(src, fld, BYTE_SIZE), \ + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #else /* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so * for big-endian we need to adjust destination pointer accordingly, based on * field byte size */ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ - bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ - __CORE_RELO(src, fld, BYTE_SIZE), \ - (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) + bpf_probe_read_kernel( \ + (void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ + __CORE_RELO(src, fld, BYTE_SIZE), \ + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #endif /* * Extract bitfield, identified by s->field, and return its value as u64. * All this is done in relocatable manner, so bitfield changes such as * signedness, bit size, offset changes, this will be handled automatically. - * This version of macro is using bpf_probe_read() to read underlying integer - * storage. Macro functions as an expression and its return type is - * bpf_probe_read()'s return value: 0, on success, <0 on error. + * This version of macro is using bpf_probe_read_kernel() to read underlying + * integer storage. Macro functions as an expression and its return type is + * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error. */ #define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ unsigned long long val = 0; \ @@ -99,8 +101,8 @@ enum bpf_field_info_kind { __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) /* - * bpf_core_read() abstracts away bpf_probe_read() call and captures offset - * relocation for source address using __builtin_preserve_access_index() + * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures + * offset relocation for source address using __builtin_preserve_access_index() * built-in, provided by Clang. * * __builtin_preserve_access_index() takes as an argument an expression of @@ -115,8 +117,8 @@ enum bpf_field_info_kind { * (local) BTF, used to record relocation. */ #define bpf_core_read(dst, sz, src) \ - bpf_probe_read(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel(dst, sz, \ + (const void *)__builtin_preserve_access_index(src)) /* * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() @@ -124,8 +126,8 @@ enum bpf_field_info_kind { * argument. */ #define bpf_core_read_str(dst, sz, src) \ - bpf_probe_read_str(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel_str(dst, sz, \ + (const void *)__builtin_preserve_access_index(src)) #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) @@ -239,15 +241,17 @@ enum bpf_field_info_kind { * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); * * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF - * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: + * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically + * equivalent to: * 1. const void *__t = s->a.b.c; * 2. __t = __t->d.e; * 3. __t = __t->f; * 4. return __t->g; * * Equivalence is logical, because there is a heavy type casting/preservation - * involved, as well as all the reads are happening through bpf_probe_read() - * calls using __builtin_preserve_access_index() to emit CO-RE relocations. + * involved, as well as all the reads are happening through + * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to + * emit CO-RE relocations. * * N.B. Only up to 9 "field accessors" are supported, which should be more * than enough for any practical purpose. diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index eebf020cbe3e..f9ef37707888 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -289,9 +289,9 @@ struct pt_regs; #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #else #define BPF_KPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) + ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read(&(ip), sizeof(ip), \ + ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif -- cgit v1.3.1 From 68b08647c7d56ea15114435c3023adc017d11b81 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 14:33:56 -0700 Subject: libbpf: Detect minimal BTF support and skip BTF loading, if missing Detect whether a kernel supports any BTF at all, and if not, don't even attempt loading BTF to avoid unnecessary log messages like: libbpf: Error loading BTF: Invalid argument(22) libbpf: Error loading .BTF into kernel: -22. BTF is optional, ignoring. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818213356.2629020-8-andriin@fb.com --- tools/lib/bpf/libbpf.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e3ab1794d2c3..775fa6317483 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -170,6 +170,8 @@ enum kern_feature_id { FEAT_PROG_NAME, /* v5.2: kernel support for global data sections. */ FEAT_GLOBAL_DATA, + /* BTF support */ + FEAT_BTF, /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ FEAT_BTF_FUNC, /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ @@ -2533,6 +2535,15 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (!obj->btf) return 0; + if (!kernel_supports(FEAT_BTF)) { + if (kernel_needs_btf(obj)) { + err = -EOPNOTSUPP; + goto report; + } + pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); + return 0; + } + sanitize = btf_needs_sanitization(obj); if (sanitize) { const void *raw_data; @@ -2558,6 +2569,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } btf__free(kern_btf); } +report: if (err) { btf_mandatory = kernel_needs_btf(obj); pr_warn("Error loading .BTF into kernel: %d. %s\n", err, @@ -3502,6 +3514,18 @@ static int probe_kern_global_data(void) return probe_fd(ret); } +static int probe_kern_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + static int probe_kern_btf_func(void) { static const char strs[] = "\0int\0x\0a"; @@ -3633,6 +3657,9 @@ static struct kern_feature_desc { [FEAT_GLOBAL_DATA] = { "global variables", probe_kern_global_data, }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, [FEAT_BTF_FUNC] = { "BTF functions", probe_kern_btf_func, }, -- cgit v1.3.1 From 81ba0889027505b7d5136319117e473a69a923c4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 15:39:13 -0700 Subject: libbpf: Improve error logging for mismatched BTF kind cases Instead of printing out integer value of BTF kind, print out a string representation of a kind. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818223921.2911963-2-andriin@fb.com --- tools/lib/bpf/libbpf.c | 59 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 775fa6317483..94d92763bcb1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1886,6 +1886,29 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) return btf_is_func_proto(t) ? t : NULL; } +static const char *btf_kind_str(const struct btf_type *t) +{ + switch (btf_kind(t)) { + case BTF_KIND_UNKN: return "void"; + case BTF_KIND_INT: return "int"; + case BTF_KIND_PTR: return "ptr"; + case BTF_KIND_ARRAY: return "array"; + case BTF_KIND_STRUCT: return "struct"; + case BTF_KIND_UNION: return "union"; + case BTF_KIND_ENUM: return "enum"; + case BTF_KIND_FWD: return "fwd"; + case BTF_KIND_TYPEDEF: return "typedef"; + case BTF_KIND_VOLATILE: return "volatile"; + case BTF_KIND_CONST: return "const"; + case BTF_KIND_RESTRICT: return "restrict"; + case BTF_KIND_FUNC: return "func"; + case BTF_KIND_FUNC_PROTO: return "func_proto"; + case BTF_KIND_VAR: return "var"; + case BTF_KIND_DATASEC: return "datasec"; + default: return "unknown"; + } +} + /* * Fetch integer attribute of BTF map definition. Such attributes are * represented using a pointer to an array, in which dimensionality of array @@ -1902,8 +1925,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, const struct btf_type *arr_t; if (!btf_is_ptr(t)) { - pr_warn("map '%s': attr '%s': expected PTR, got %u.\n", - map_name, name, btf_kind(t)); + pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", + map_name, name, btf_kind_str(t)); return false; } @@ -1914,8 +1937,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return false; } if (!btf_is_array(arr_t)) { - pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n", - map_name, name, btf_kind(arr_t)); + pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", + map_name, name, btf_kind_str(arr_t)); return false; } arr_info = btf_array(arr_t); @@ -2009,8 +2032,8 @@ static int parse_btf_map_def(struct bpf_object *obj, return -EINVAL; } if (!btf_is_ptr(t)) { - pr_warn("map '%s': key spec is not PTR: %u.\n", - map->name, btf_kind(t)); + pr_warn("map '%s': key spec is not PTR: %s.\n", + map->name, btf_kind_str(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); @@ -2051,8 +2074,8 @@ static int parse_btf_map_def(struct bpf_object *obj, return -EINVAL; } if (!btf_is_ptr(t)) { - pr_warn("map '%s': value spec is not PTR: %u.\n", - map->name, btf_kind(t)); + pr_warn("map '%s': value spec is not PTR: %s.\n", + map->name, btf_kind_str(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); @@ -2109,14 +2132,14 @@ static int parse_btf_map_def(struct bpf_object *obj, t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type, NULL); if (!btf_is_ptr(t)) { - pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n", - map->name, btf_kind(t)); + pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", + map->name, btf_kind_str(t)); return -EINVAL; } t = skip_mods_and_typedefs(obj->btf, t->type, NULL); if (!btf_is_struct(t)) { - pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n", - map->name, btf_kind(t)); + pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", + map->name, btf_kind_str(t)); return -EINVAL; } @@ -2207,8 +2230,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, return -EINVAL; } if (!btf_is_var(var)) { - pr_warn("map '%s': unexpected var kind %u.\n", - map_name, btf_kind(var)); + pr_warn("map '%s': unexpected var kind %s.\n", + map_name, btf_kind_str(var)); return -EINVAL; } if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && @@ -2220,8 +2243,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, def = skip_mods_and_typedefs(obj->btf, var->type, NULL); if (!btf_is_struct(def)) { - pr_warn("map '%s': unexpected def kind %u.\n", - map_name, btf_kind(var)); + pr_warn("map '%s': unexpected def kind %s.\n", + map_name, btf_kind_str(var)); return -EINVAL; } if (def->size > vi->size) { @@ -4212,8 +4235,8 @@ static int bpf_core_spec_parse(const struct btf *btf, return sz; spec->bit_offset += access_idx * sz * 8; } else { - pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", - type_id, spec_str, i, id, btf_kind(t)); + pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", + type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } -- cgit v1.3.1 From 28b93c64499ae09d9dc8c04123b15f8654a93c4c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 15:39:14 -0700 Subject: libbpf: Clean up and improve CO-RE reloc logging Add logging of local/target type kind (struct/union/typedef/etc). Preserve unresolved root type ID (for cases of typedef). Improve the format of CO-RE reloc spec output format to contain only relevant and succinct info. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818223921.2911963-3-andriin@fb.com --- tools/lib/bpf/btf.c | 17 ++--- tools/lib/bpf/btf.h | 38 --------- tools/lib/bpf/libbpf.c | 165 +++++++++++++++++++++++++--------------- tools/lib/bpf/libbpf_internal.h | 78 ++++++++++++++----- 4 files changed, 169 insertions(+), 129 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7dfca7016aaa..1deedbd19c6c 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1131,14 +1131,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext) return btf_ext_setup_info(btf_ext, ¶m); } -static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext) +static int btf_ext_setup_core_relos(struct btf_ext *btf_ext) { struct btf_ext_sec_setup_param param = { - .off = btf_ext->hdr->field_reloc_off, - .len = btf_ext->hdr->field_reloc_len, - .min_rec_size = sizeof(struct bpf_field_reloc), - .ext_info = &btf_ext->field_reloc_info, - .desc = "field_reloc", + .off = btf_ext->hdr->core_relo_off, + .len = btf_ext->hdr->core_relo_len, + .min_rec_size = sizeof(struct bpf_core_relo), + .ext_info = &btf_ext->core_relo_info, + .desc = "core_relo", }; return btf_ext_setup_info(btf_ext, ¶m); @@ -1217,10 +1217,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) if (err) goto done; - if (btf_ext->hdr->hdr_len < - offsetofend(struct btf_ext_header, field_reloc_len)) + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) goto done; - err = btf_ext_setup_field_reloc(btf_ext); + err = btf_ext_setup_core_relos(btf_ext); if (err) goto done; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 1ca14448df4c..91f0ad0e0325 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -24,44 +24,6 @@ struct btf_type; struct bpf_object; -/* - * The .BTF.ext ELF section layout defined as - * struct btf_ext_header - * func_info subsection - * - * The func_info subsection layout: - * record size for struct bpf_func_info in the func_info subsection - * struct btf_sec_func_info for section #1 - * a list of bpf_func_info records for section #1 - * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h - * but may not be identical - * struct btf_sec_func_info for section #2 - * a list of bpf_func_info records for section #2 - * ...... - * - * Note that the bpf_func_info record size in .BTF.ext may not - * be the same as the one defined in include/uapi/linux/bpf.h. - * The loader should ensure that record_size meets minimum - * requirement and pass the record as is to the kernel. The - * kernel will handle the func_info properly based on its contents. - */ -struct btf_ext_header { - __u16 magic; - __u8 version; - __u8 flags; - __u32 hdr_len; - - /* All offsets are in bytes relative to the end of this header */ - __u32 func_info_off; - __u32 func_info_len; - __u32 line_info_off; - __u32 line_info_len; - - /* optional part of .BTF.ext header */ - __u32 field_reloc_off; - __u32 field_reloc_len; -}; - LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(const void *data, __u32 size); LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 94d92763bcb1..894d388094e3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2524,7 +2524,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) int err; /* CO-RE relocations need kernel BTF */ - if (obj->btf_ext && obj->btf_ext->field_reloc_info.len) + if (obj->btf_ext && obj->btf_ext->core_relo_info.len) need_vmlinux_btf = true; bpf_object__for_each_program(prog, obj) { @@ -4074,6 +4074,10 @@ struct bpf_core_spec { const struct btf *btf; /* high-level spec: named fields and array indices only */ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* original unresolved (no skip_mods_or_typedefs) root type ID */ + __u32 root_type_id; + /* CO-RE relocation kind */ + enum bpf_core_relo_kind relo_kind; /* high-level spec length */ int len; /* raw, low-level spec: 1-to-1 with accessor spec string */ @@ -4104,8 +4108,36 @@ static bool is_flex_arr(const struct btf *btf, return acc->idx == btf_vlen(t) - 1; } +static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_FIELD_BYTE_OFFSET: return "byte_off"; + case BPF_FIELD_BYTE_SIZE: return "byte_sz"; + case BPF_FIELD_EXISTS: return "field_exists"; + case BPF_FIELD_SIGNED: return "signed"; + case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; + case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; + default: return "unknown"; + } +} + +static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_FIELD_BYTE_OFFSET: + case BPF_FIELD_BYTE_SIZE: + case BPF_FIELD_EXISTS: + case BPF_FIELD_SIGNED: + case BPF_FIELD_LSHIFT_U64: + case BPF_FIELD_RSHIFT_U64: + return true; + default: + return false; + } +} + /* - * Turn bpf_field_reloc into a low- and high-level spec representation, + * Turn bpf_core_relo into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting * field bit offset, specified by accessor string. Low-level spec captures * every single level of nestedness, including traversing anonymous @@ -4135,9 +4167,10 @@ static bool is_flex_arr(const struct btf *btf, * - array element #3 access (corresponds to '3' in low-level spec). * */ -static int bpf_core_spec_parse(const struct btf *btf, +static int bpf_core_parse_spec(const struct btf *btf, __u32 type_id, const char *spec_str, + enum bpf_core_relo_kind relo_kind, struct bpf_core_spec *spec) { int access_idx, parsed_len, i; @@ -4152,6 +4185,8 @@ static int bpf_core_spec_parse(const struct btf *btf, memset(spec, 0, sizeof(*spec)); spec->btf = btf; + spec->root_type_id = type_id; + spec->relo_kind = relo_kind; /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ while (*spec_str) { @@ -4178,6 +4213,9 @@ static int bpf_core_spec_parse(const struct btf *btf, spec->spec[0].idx = access_idx; spec->len++; + if (!core_relo_is_field_based(relo_kind)) + return -EINVAL; + sz = btf__resolve_size(btf, id); if (sz < 0) return sz; @@ -4285,17 +4323,17 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, const struct btf *targ_btf) { size_t local_essent_len, targ_essent_len; - const char *local_name, *targ_name; - const struct btf_type *t; + const char *local_name, *targ_name, *targ_kind; + const struct btf_type *t, *local_t; struct ids_vec *cand_ids; __u32 *new_ids; int i, err, n; - t = btf__type_by_id(local_btf, local_type_id); - if (!t) + local_t = btf__type_by_id(local_btf, local_type_id); + if (!local_t) return ERR_PTR(-EINVAL); - local_name = btf__name_by_offset(local_btf, t->name_off); + local_name = btf__name_by_offset(local_btf, local_t->name_off); if (str_is_empty(local_name)) return ERR_PTR(-EINVAL); local_essent_len = bpf_core_essential_name_len(local_name); @@ -4310,6 +4348,7 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, targ_name = btf__name_by_offset(targ_btf, t->name_off); if (str_is_empty(targ_name)) continue; + targ_kind = btf_kind_str(t); t = skip_mods_and_typedefs(targ_btf, i, NULL); if (!btf_is_composite(t) && !btf_is_array(t)) @@ -4320,8 +4359,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, continue; if (strncmp(local_name, targ_name, local_essent_len) == 0) { - pr_debug("[%d] %s: found candidate [%d] %s\n", - local_type_id, local_name, i, targ_name); + pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n", + local_type_id, btf_kind_str(local_t), + local_name, i, targ_kind, targ_name); new_ids = reallocarray(cand_ids->data, cand_ids->len + 1, sizeof(*cand_ids->data)); @@ -4510,6 +4550,8 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, memset(targ_spec, 0, sizeof(*targ_spec)); targ_spec->btf = targ_btf; + targ_spec->root_type_id = targ_id; + targ_spec->relo_kind = local_spec->relo_kind; local_acc = &local_spec->spec[0]; targ_acc = &targ_spec->spec[0]; @@ -4570,7 +4612,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, } static int bpf_core_calc_field_relo(const struct bpf_program *prog, - const struct bpf_field_reloc *relo, + const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, __u32 *val, bool *validate) { @@ -4691,7 +4733,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, * 2. rX += (arithmetic operations with immediate operand); */ static int bpf_core_reloc_insn(struct bpf_program *prog, - const struct bpf_field_reloc *relo, + const struct bpf_core_relo *relo, int relo_idx, const struct bpf_core_spec *local_spec, const struct bpf_core_spec *targ_spec) @@ -4795,25 +4837,30 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) __u32 type_id; int i; - type_id = spec->spec[0].type_id; + type_id = spec->root_type_id; t = btf__type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); - libbpf_print(level, "[%u] %s + ", type_id, s); - for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%d%s", spec->raw_spec[i], - i == spec->raw_len - 1 ? " => " : ":"); + libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "" : s); + + if (core_relo_is_field_based(spec->relo_kind)) { + for (i = 0; i < spec->len; i++) { + if (spec->spec[i].name) + libbpf_print(level, ".%s", spec->spec[i].name); + else if (i > 0 || spec->spec[i].idx > 0) + libbpf_print(level, "[%u]", spec->spec[i].idx); + } - libbpf_print(level, "%u.%u @ &x", - spec->bit_offset / 8, spec->bit_offset % 8); + libbpf_print(level, " ("); + for (i = 0; i < spec->raw_len; i++) + libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); - for (i = 0; i < spec->len; i++) { - if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); + if (spec->bit_offset % 8) + libbpf_print(level, " @ offset %u.%u)", + spec->bit_offset / 8, spec->bit_offset % 8); else - libbpf_print(level, "[%u]", spec->spec[i].idx); + libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); } - } static size_t bpf_core_hash_fn(const void *key, void *ctx) @@ -4877,12 +4924,12 @@ static void *u32_as_hash_key(__u32 x) * CPU-wise compared to prebuilding a map from all local type names to * a list of candidate type names. It's also sped up by caching resolved * list of matching candidates per each local "root" type ID, that has at - * least one bpf_field_reloc associated with it. This list is shared + * least one bpf_core_relo associated with it. This list is shared * between multiple relocations for the same type ID and is updated as some * of the candidates are pruned due to structural incompatibility. */ static int bpf_core_reloc_field(struct bpf_program *prog, - const struct bpf_field_reloc *relo, + const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, const struct btf *targ_btf, @@ -4891,8 +4938,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog, const char *prog_name = bpf_program__title(prog, false); struct bpf_core_spec local_spec, cand_spec, targ_spec; const void *type_key = u32_as_hash_key(relo->type_id); - const struct btf_type *local_type, *cand_type; - const char *local_name, *cand_name; + const struct btf_type *local_type; + const char *local_name; struct ids_vec *cand_ids; __u32 local_id, cand_id; const char *spec_str; @@ -4911,24 +4958,24 @@ static int bpf_core_reloc_field(struct bpf_program *prog, if (str_is_empty(spec_str)) return -EINVAL; - err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); + err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); if (err) { - pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", - prog_name, relo_idx, local_id, local_name, spec_str, - err); + pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", + prog_name, relo_idx, local_id, btf_kind_str(local_type), + local_name, spec_str, err); return -EINVAL; } - pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx, - relo->kind); + pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, + relo_idx, core_relo_kind_str(relo->kind), relo->kind); bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); libbpf_print(LIBBPF_DEBUG, "\n"); if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); if (IS_ERR(cand_ids)) { - pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", - prog_name, relo_idx, local_id, local_name, + pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld", + prog_name, relo_idx, local_id, btf_kind_str(local_type), local_name, PTR_ERR(cand_ids)); return PTR_ERR(cand_ids); } @@ -4941,20 +4988,20 @@ static int bpf_core_reloc_field(struct bpf_program *prog, for (i = 0, j = 0; i < cand_ids->len; i++) { cand_id = cand_ids->data[i]; - cand_type = btf__type_by_id(targ_btf, cand_id); - cand_name = btf__name_by_offset(targ_btf, cand_type->name_off); - - err = bpf_core_spec_match(&local_spec, targ_btf, - cand_id, &cand_spec); - pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ", - prog_name, relo_idx, i, cand_name); - bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); - libbpf_print(LIBBPF_DEBUG, ": %d\n", err); + err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec); if (err < 0) { - pr_warn("prog '%s': relo #%d: matching error: %d\n", - prog_name, relo_idx, err); + pr_warn("prog '%s': relo #%d: error matching candidate #%d ", + prog_name, relo_idx, i); + bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); + libbpf_print(LIBBPF_WARN, ": %d\n", err); return err; } + + pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i); + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + if (err == 0) continue; @@ -4996,8 +5043,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog, * to a specific instruction number in its log. */ if (j == 0) - pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", - prog_name, relo_idx, local_id, local_name, spec_str); + pr_debug("prog '%s': relo #%d: no matching targets found\n", + prog_name, relo_idx); /* bpf_core_reloc_insn should know how to handle missing targ_spec */ err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec, @@ -5012,10 +5059,10 @@ static int bpf_core_reloc_field(struct bpf_program *prog, } static int -bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) +bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) { const struct btf_ext_info_sec *sec; - const struct bpf_field_reloc *rec; + const struct bpf_core_relo *rec; const struct btf_ext_info *seg; struct hashmap_entry *entry; struct hashmap *cand_cache = NULL; @@ -5024,6 +5071,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) const char *sec_name; int i, err = 0; + if (obj->btf_ext->core_relo_info.len == 0) + return 0; + if (targ_btf_path) targ_btf = btf__parse_elf(targ_btf_path, NULL); else @@ -5039,7 +5089,7 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) goto out; } - seg = &obj->btf_ext->field_reloc_info; + seg = &obj->btf_ext->core_relo_info; for_each_btf_ext_sec(seg, sec) { sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { @@ -5087,17 +5137,6 @@ out: return err; } -static int -bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) -{ - int err = 0; - - if (obj->btf_ext->field_reloc_info.len) - err = bpf_core_reloc_fields(obj, targ_btf_path); - - return err; -} - static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 50d70e90d5f1..b776a7125c92 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -138,6 +138,44 @@ struct btf_ext_info { i < (sec)->num_info; \ i++, rec = (void *)rec + (seg)->rec_size) +/* + * The .BTF.ext ELF section layout defined as + * struct btf_ext_header + * func_info subsection + * + * The func_info subsection layout: + * record size for struct bpf_func_info in the func_info subsection + * struct btf_sec_func_info for section #1 + * a list of bpf_func_info records for section #1 + * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h + * but may not be identical + * struct btf_sec_func_info for section #2 + * a list of bpf_func_info records for section #2 + * ...... + * + * Note that the bpf_func_info record size in .BTF.ext may not + * be the same as the one defined in include/uapi/linux/bpf.h. + * The loader should ensure that record_size meets minimum + * requirement and pass the record as is to the kernel. The + * kernel will handle the func_info properly based on its contents. + */ +struct btf_ext_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 func_info_off; + __u32 func_info_len; + __u32 line_info_off; + __u32 line_info_len; + + /* optional part of .BTF.ext header */ + __u32 core_relo_off; + __u32 core_relo_len; +}; + struct btf_ext { union { struct btf_ext_header *hdr; @@ -145,7 +183,7 @@ struct btf_ext { }; struct btf_ext_info func_info; struct btf_ext_info line_info; - struct btf_ext_info field_reloc_info; + struct btf_ext_info core_relo_info; __u32 data_size; }; @@ -170,32 +208,34 @@ struct bpf_line_info_min { __u32 line_col; }; -/* bpf_field_info_kind encodes which aspect of captured field has to be - * adjusted by relocations. Currently supported values are: - * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes); - * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise); +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. */ -enum bpf_field_info_kind { +enum bpf_core_relo_kind { BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_FIELD_BYTE_SIZE = 1, + BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_FIELD_SIGNED = 3, - BPF_FIELD_LSHIFT_U64 = 4, - BPF_FIELD_RSHIFT_U64 = 5, + BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ }; -/* The minimum bpf_field_reloc checked by the loader +/* The minimum bpf_core_relo checked by the loader * - * Field relocation captures the following data: + * CO-RE relocation captures the following data: * - insn_off - instruction offset (in bytes) within a BPF program that needs * its insn->imm field to be relocated with actual field info; * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * field; + * type or field; * - access_str_off - offset into corresponding .BTF string section. String - * itself encodes an accessed field using a sequence of field and array - * indicies, separated by colon (:). It's conceptually very close to LLVM's - * getelementptr ([0]) instruction's arguments for identifying offset to - * a field. + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; * * Example to provide a better feel. * @@ -226,11 +266,11 @@ enum bpf_field_info_kind { * * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction */ -struct bpf_field_reloc { +struct bpf_core_relo { __u32 insn_off; __u32 type_id; __u32 access_str_off; - enum bpf_field_info_kind kind; + enum bpf_core_relo_kind kind; }; #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ -- cgit v1.3.1 From 353c788c2054257359114f12827d86f8e3332e10 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 15:39:15 -0700 Subject: libbpf: Improve relocation ambiguity detection Split the instruction patching logic into relocation value calculation and application of relocation to instruction. Using this, evaluate relocation against each matching candidate and validate that all candidates agree on relocated value. If not, report ambiguity and fail load. This logic is necessary to avoid dangerous (however unlikely) accidental match against two incompatible candidate types. Without this change, libbpf will pick a random type as *the* candidate and apply potentially invalid relocation. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818223921.2911963-4-andriin@fb.com --- tools/lib/bpf/libbpf.c | 170 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 124 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 894d388094e3..0276cf85d763 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4616,14 +4616,25 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, const struct bpf_core_spec *spec, __u32 *val, bool *validate) { - const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1]; - const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id); + const struct bpf_core_accessor *acc; + const struct btf_type *t; __u32 byte_off, byte_sz, bit_off, bit_sz; const struct btf_member *m; const struct btf_type *mt; bool bitfield; __s64 sz; + if (relo->kind == BPF_FIELD_EXISTS) { + *val = spec ? 1 : 0; + return 0; + } + + if (!spec) + return -EUCLEAN; /* request instruction poisoning */ + + acc = &spec->spec[spec->len - 1]; + t = btf__type_by_id(spec->btf, acc->type_id); + /* a[n] accessor needs special handling */ if (!acc->name) { if (relo->kind == BPF_FIELD_BYTE_OFFSET) { @@ -4709,21 +4720,88 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, break; case BPF_FIELD_EXISTS: default: - pr_warn("prog '%s': unknown relo %d at insn #%d\n", - bpf_program__title(prog, false), - relo->kind, relo->insn_off / 8); - return -EINVAL; + return -EOPNOTSUPP; } return 0; } +struct bpf_core_relo_res +{ + /* expected value in the instruction, unless validate == false */ + __u32 orig_val; + /* new value that needs to be patched up to */ + __u32 new_val; + /* relocation unsuccessful, poison instruction, but don't fail load */ + bool poison; + /* some relocations can't be validated against orig_val */ + bool validate; +}; + +/* Calculate original and target relocation values, given local and target + * specs and relocation kind. These values are calculated for each candidate. + * If there are multiple candidates, resulting values should all be consistent + * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. + * If instruction has to be poisoned, *poison will be set to true. + */ +static int bpf_core_calc_relo(const struct bpf_program *prog, + const struct bpf_core_relo *relo, + int relo_idx, + const struct bpf_core_spec *local_spec, + const struct bpf_core_spec *targ_spec, + struct bpf_core_relo_res *res) +{ + int err = -EOPNOTSUPP; + + res->orig_val = 0; + res->new_val = 0; + res->poison = false; + res->validate = true; + + if (core_relo_is_field_based(relo->kind)) { + err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate); + err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL); + } + + if (err == -EUCLEAN) { + /* EUCLEAN is used to signal instruction poisoning request */ + res->poison = true; + err = 0; + } else if (err == -EOPNOTSUPP) { + /* EOPNOTSUPP means unknown/unsupported relocation */ + pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", + bpf_program__title(prog, false), relo_idx, + core_relo_kind_str(relo->kind), relo->kind, relo->insn_off / 8); + } + + return err; +} + +/* + * Turn instruction for which CO_RE relocation failed into invalid one with + * distinct signature. + */ +static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn) +{ + pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", + bpf_program__title(prog, false), relo_idx, insn_idx); + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with the following message: + * invalid func unknown#195896080 + */ + insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ +} + /* * Patch relocatable BPF instruction. * * Patched value is determined by relocation kind and target specification. - * For field existence relocation target spec will be NULL if field is not - * found. + * For existence relocations target spec will be NULL if field/type is not found. * Expected insn->imm value is determined using relocation kind and local * spec, and is checked before patching instruction. If actual insn->imm value * is wrong, bail out with error. @@ -4732,16 +4810,14 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, * 1. rX = (assignment with immediate operand); * 2. rX += (arithmetic operations with immediate operand); */ -static int bpf_core_reloc_insn(struct bpf_program *prog, +static int bpf_core_patch_insn(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, - const struct bpf_core_spec *local_spec, - const struct bpf_core_spec *targ_spec) + const struct bpf_core_relo_res *res) { __u32 orig_val, new_val; struct bpf_insn *insn; - bool validate = true; - int insn_idx, err; + int insn_idx; __u8 class; if (relo->insn_off % sizeof(struct bpf_insn)) @@ -4750,39 +4826,20 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, insn = &prog->insns[insn_idx]; class = BPF_CLASS(insn->code); - if (relo->kind == BPF_FIELD_EXISTS) { - orig_val = 1; /* can't generate EXISTS relo w/o local field */ - new_val = targ_spec ? 1 : 0; - } else if (!targ_spec) { - pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", - bpf_program__title(prog, false), relo_idx, insn_idx); - insn->code = BPF_JMP | BPF_CALL; - insn->dst_reg = 0; - insn->src_reg = 0; - insn->off = 0; - /* if this instruction is reachable (not a dead code), - * verifier will complain with the following message: - * invalid func unknown#195896080 - */ - insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ + if (res->poison) { + bpf_core_poison_insn(prog, relo_idx, insn_idx, insn); return 0; - } else { - err = bpf_core_calc_field_relo(prog, relo, local_spec, - &orig_val, &validate); - if (err) - return err; - err = bpf_core_calc_field_relo(prog, relo, targ_spec, - &new_val, NULL); - if (err) - return err; } + orig_val = res->orig_val; + new_val = res->new_val; + switch (class) { case BPF_ALU: case BPF_ALU64: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; - if (validate && insn->imm != orig_val) { + if (res->validate && insn->imm != orig_val) { pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", bpf_program__title(prog, false), relo_idx, insn_idx, insn->imm, orig_val, new_val); @@ -4797,7 +4854,7 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, case BPF_LDX: case BPF_ST: case BPF_STX: - if (validate && insn->off != orig_val) { + if (res->validate && insn->off != orig_val) { pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", bpf_program__title(prog, false), relo_idx, insn_idx, insn->off, orig_val, new_val); @@ -4938,6 +4995,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog, const char *prog_name = bpf_program__title(prog, false); struct bpf_core_spec local_spec, cand_spec, targ_spec; const void *type_key = u32_as_hash_key(relo->type_id); + struct bpf_core_relo_res cand_res, targ_res; const struct btf_type *local_type; const char *local_name; struct ids_vec *cand_ids; @@ -5005,16 +5063,31 @@ static int bpf_core_reloc_field(struct bpf_program *prog, if (err == 0) continue; + err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res); + if (err) + return err; + if (j == 0) { + targ_res = cand_res; targ_spec = cand_spec; } else if (cand_spec.bit_offset != targ_spec.bit_offset) { - /* if there are many candidates, they should all - * resolve to the same bit offset + /* if there are many field relo candidates, they + * should all resolve to the same bit offset */ - pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n", + pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", prog_name, relo_idx, cand_spec.bit_offset, targ_spec.bit_offset); return -EINVAL; + } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { + /* all candidates should result in the same relocation + * decision and value, otherwise it's dangerous to + * proceed due to ambiguity + */ + pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", + prog_name, relo_idx, + cand_res.poison ? "failure" : "success", cand_res.new_val, + targ_res.poison ? "failure" : "success", targ_res.new_val); + return -EINVAL; } cand_ids->data[j++] = cand_spec.spec[0].type_id; @@ -5042,13 +5115,18 @@ static int bpf_core_reloc_field(struct bpf_program *prog, * verifier. If it was an error, then verifier will complain and point * to a specific instruction number in its log. */ - if (j == 0) + if (j == 0) { pr_debug("prog '%s': relo #%d: no matching targets found\n", prog_name, relo_idx); - /* bpf_core_reloc_insn should know how to handle missing targ_spec */ - err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec, - j ? &targ_spec : NULL); + /* calculate single target relo result explicitly */ + err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res); + if (err) + return err; + } + + /* bpf_core_patch_insn() should know how to handle missing targ_spec */ + err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res); if (err) { pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", prog_name, relo_idx, relo->insn_off, err); -- cgit v1.3.1 From 00b2e95325f8c9dd56a7dae691b01e321838a557 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 15:39:16 -0700 Subject: selftests/bpf: Add test validating failure on ambiguous relocation value Add test simulating ambiguous field size relocation, while fields themselves are at the exact same offset. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818223921.2911963-5-andriin@fb.com --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 1 + .../progs/btf__core_reloc_size___err_ambiguous.c | 4 ++++ .../testing/selftests/bpf/progs/core_reloc_types.h | 25 ++++++++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index a54eafc5e4b3..4d650e99be28 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -452,6 +452,7 @@ static struct core_reloc_test_case test_cases[] = { /* size relocation checks */ SIZE_CASE(size), SIZE_CASE(size___diff_sz), + SIZE_ERR_CASE(size___err_ambiguous), }; struct data { diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c new file mode 100644 index 000000000000..f3e9904df9c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c @@ -0,0 +1,4 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_size___err_ambiguous1 x, + struct core_reloc_size___err_ambiguous2 y) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 69139ed66216..3b1126c0bc8f 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -809,3 +809,28 @@ struct core_reloc_size___diff_sz { void *ptr_field; enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field; }; + +/* Error case of two candidates with the fields (int_field) at the same + * offset, but with differing final relocation values: size 4 vs size 1 + */ +struct core_reloc_size___err_ambiguous1 { + /* int at offset 0 */ + int int_field; + + struct { int x; } struct_field; + union { int x; } union_field; + int arr_field[4]; + void *ptr_field; + enum { VALUE___1 = 123 } enum_field; +}; + +struct core_reloc_size___err_ambiguous2 { + /* char at offset 0 */ + char int_field; + + struct { int x; } struct_field; + union { int x; } union_field; + int arr_field[4]; + void *ptr_field; + enum { VALUE___2 = 123 } enum_field; +}; -- cgit v1.3.1 From 029258d7b22894fabcecb1626e1b87d18a6823f4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 18:36:04 -0700 Subject: libbpf: Remove any use of reallocarray() in libbpf Re-implement glibc's reallocarray() for libbpf internal-only use. reallocarray(), unfortunately, is not available in all versions of glibc, so requires extra feature detection and using reallocarray() stub from and COMPAT_NEED_REALLOCARRAY. All this complicates build of libbpf unnecessarily and is just a maintenance burden. Instead, it's trivial to implement libbpf-specific internal version and use it throughout libbpf. Which is what this patch does, along with converting some realloc() uses that should really have been reallocarray() in the first place. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200819013607.3607269-2-andriin@fb.com --- tools/lib/bpf/Makefile | 6 +----- tools/lib/bpf/btf.c | 11 +++++------ tools/lib/bpf/btf_dump.c | 6 ++---- tools/lib/bpf/libbpf.c | 21 ++++++++++----------- tools/lib/bpf/libbpf_internal.h | 25 +++++++++++++++++++++++++ tools/lib/bpf/ringbuf.c | 5 ++--- 6 files changed, 45 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 95c946e94ca5..621ad96d06fd 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -56,7 +56,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray +FEATURE_TESTS = libelf libelf-mmap zlib bpf FEATURE_DISPLAY = libelf zlib bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -102,10 +102,6 @@ ifeq ($(feature-libelf-mmap), 1) override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT endif -ifeq ($(feature-reallocarray), 0) - override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY -endif - # Append required CFLAGS override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum override CFLAGS += -Werror -Wall diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1deedbd19c6c..1b7d85d94a07 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -61,7 +61,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) expand_by = max(btf->types_size >> 2, 16U); new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); - new_types = realloc(btf->types, sizeof(*new_types) * new_size); + new_types = libbpf_reallocarray(btf->types, new_size, sizeof(*new_types)); if (!new_types) return -ENOMEM; @@ -1574,7 +1574,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d, __u32 *new_list; d->hypot_cap += max((size_t)16, d->hypot_cap / 2); - new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); + new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32)); if (!new_list) return -ENOMEM; d->hypot_list = new_list; @@ -1870,8 +1870,7 @@ static int btf_dedup_strings(struct btf_dedup *d) struct btf_str_ptr *new_ptrs; strs.cap += max(strs.cnt / 2, 16U); - new_ptrs = realloc(strs.ptrs, - sizeof(strs.ptrs[0]) * strs.cap); + new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0])); if (!new_ptrs) { err = -ENOMEM; goto done; @@ -2956,8 +2955,8 @@ static int btf_dedup_compact_types(struct btf_dedup *d) d->btf->nr_types = next_type_id - 1; d->btf->types_size = d->btf->nr_types; d->btf->hdr->type_len = p - types_start; - new_types = realloc(d->btf->types, - (1 + d->btf->nr_types) * sizeof(struct btf_type *)); + new_types = libbpf_reallocarray(d->btf->types, (1 + d->btf->nr_types), + sizeof(struct btf_type *)); if (!new_types) return -ENOMEM; d->btf->types = new_types; diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index fe39bd774697..1ad852ad0a86 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -323,8 +323,7 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id) if (d->emit_queue_cnt >= d->emit_queue_cap) { new_cap = max(16, d->emit_queue_cap * 3 / 2); - new_queue = realloc(d->emit_queue, - new_cap * sizeof(new_queue[0])); + new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0])); if (!new_queue) return -ENOMEM; d->emit_queue = new_queue; @@ -1003,8 +1002,7 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) if (d->decl_stack_cnt >= d->decl_stack_cap) { new_cap = max(16, d->decl_stack_cap * 3 / 2); - new_stack = realloc(d->decl_stack, - new_cap * sizeof(new_stack[0])); + new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0])); if (!new_stack) return -ENOMEM; d->decl_stack = new_stack; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0276cf85d763..2653bcee73b7 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -567,7 +566,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, progs = obj->programs; nr_progs = obj->nr_programs; - progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); + progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(progs[0])); if (!progs) { /* * In this case the original obj->programs @@ -1292,7 +1291,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return &obj->maps[obj->nr_maps++]; new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps)); + new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps)); if (!new_maps) { pr_warn("alloc maps for object failed\n"); return ERR_PTR(-ENOMEM); @@ -2721,8 +2720,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) continue; } - sects = reallocarray(sects, nr_sects + 1, - sizeof(*obj->efile.reloc_sects)); + sects = libbpf_reallocarray(sects, nr_sects + 1, + sizeof(*obj->efile.reloc_sects)); if (!sects) { pr_warn("reloc_sects realloc failed\n"); return -ENOMEM; @@ -2925,7 +2924,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) continue; ext = obj->externs; - ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); + ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); if (!ext) return -ENOMEM; obj->externs = ext; @@ -4362,9 +4361,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n", local_type_id, btf_kind_str(local_t), local_name, i, targ_kind, targ_name); - new_ids = reallocarray(cand_ids->data, - cand_ids->len + 1, - sizeof(*cand_ids->data)); + new_ids = libbpf_reallocarray(cand_ids->data, + cand_ids->len + 1, + sizeof(*cand_ids->data)); if (!new_ids) { err = -ENOMEM; goto err_out; @@ -5231,7 +5230,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, return -LIBBPF_ERRNO__RELOC; } new_cnt = prog->insns_cnt + text->insns_cnt; - new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); + new_insn = libbpf_reallocarray(prog->insns, new_cnt, sizeof(*insn)); if (!new_insn) { pr_warn("oom in prog realloc\n"); return -ENOMEM; @@ -5473,7 +5472,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, moff /= bpf_ptr_sz; if (moff >= map->init_slots_sz) { new_sz = moff + 1; - tmp = realloc(map->init_slots, new_sz * host_ptr_sz); + tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz); if (!tmp) return -ENOMEM; map->init_slots = tmp; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b776a7125c92..954bc2bd040c 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -9,6 +9,7 @@ #ifndef __LIBBPF_LIBBPF_INTERNAL_H #define __LIBBPF_LIBBPF_INTERNAL_H +#include #include "libbpf.h" #define BTF_INFO_ENC(kind, kind_flag, vlen) \ @@ -23,6 +24,12 @@ #define BTF_PARAM_ENC(name, type) (name), (type) #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#endif +#ifndef unlikely +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif #ifndef min # define min(x, y) ((x) < (y) ? (x) : (y)) #endif @@ -63,6 +70,24 @@ do { \ #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +/* + * Re-implement glibc's reallocarray() for libbpf internal-only use. + * reallocarray(), unfortunately, is not available in all versions of glibc, + * so requires extra feature detection and using reallocarray() stub from + * and COMPAT_NEED_REALLOCARRAY. All this complicates + * build of libbpf unnecessarily and is just a maintenance burden. Instead, + * it's trivial to implement libbpf-specific internal version and use it + * throughout libbpf. + */ +static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) +{ + size_t total; + + if (unlikely(__builtin_mul_overflow(nmemb, size, &total))) + return NULL; + return realloc(ptr, total); +} + static inline bool libbpf_validate_opts(const char *opts, size_t opts_sz, size_t user_sz, const char *type_name) diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 4fc6c6cbb4eb..5bd234be8a14 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "libbpf.h" #include "libbpf_internal.h" @@ -82,12 +81,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, return -EINVAL; } - tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings)); + tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings)); if (!tmp) return -ENOMEM; rb->rings = tmp; - tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events)); + tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events)); if (!tmp) return -ENOMEM; rb->events = tmp; -- cgit v1.3.1 From 7084566a236fbc98beb11430d8d67dd08b2ac151 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 18:36:05 -0700 Subject: tools/bpftool: Remove libbpf_internal.h usage in bpftool Most netlink-related functions were unique to bpftool usage, so I moved them into net.c. Few functions are still used by both bpftool and libbpf itself internally, so I've copy-pasted them (libbpf_nl_get_link, libbpf_netlink_open). It's a bit of duplication of code, but better separation of libbpf as a library with public API and bpftool, relying on unexposed functions in libbpf. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200819013607.3607269-3-andriin@fb.com --- tools/bpf/bpftool/gen.c | 2 - tools/bpf/bpftool/net.c | 299 +++++++++++++++++++++++++++++++++++++--- tools/lib/bpf/libbpf_internal.h | 12 -- tools/lib/bpf/netlink.c | 125 +---------------- 4 files changed, 288 insertions(+), 150 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index f61184653633..4033c46d83e7 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -19,11 +19,9 @@ #include #include -#include "bpf/libbpf_internal.h" #include "json_writer.h" #include "main.h" - #define MAX_OBJ_NAME_LEN 64 static void sanitize_identifier(char *name) diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 56c3a2bae3ef..910e7bac6e9e 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -6,22 +6,27 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include #include #include "bpf/nlattr.h" -#include "bpf/libbpf_internal.h" #include "main.h" #include "netlink_dumper.h" +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + struct ip_devname_ifindex { char devname[64]; int ifindex; @@ -85,6 +90,266 @@ static enum net_attach_type parse_attach_type(const char *str) return net_attach_type_size; } +typedef int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); + +typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, dump_nlmsg_t, void *cookie); + +static int netlink_open(__u32 *nl_pid) +{ + struct sockaddr_nl sa; + socklen_t addrlen; + int one = 1, ret; + int sock; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) + return -errno; + + if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)) < 0) { + p_err("Netlink error reporting not supported"); + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + ret = -errno; + goto cleanup; + } + + addrlen = sizeof(sa); + if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { + ret = -errno; + goto cleanup; + } + + if (addrlen != sizeof(sa)) { + ret = -LIBBPF_ERRNO__INTERNAL; + goto cleanup; + } + + *nl_pid = sa.nl_pid; + return sock; + +cleanup: + close(sock); + return ret; +} + +static int netlink_recv(int sock, __u32 nl_pid, __u32 seq, + __dump_nlmsg_t _fn, dump_nlmsg_t fn, + void *cookie) +{ + bool multipart = true; + struct nlmsgerr *err; + struct nlmsghdr *nh; + char buf[4096]; + int len, ret; + + while (multipart) { + multipart = false; + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + ret = -errno; + goto done; + } + + if (len == 0) + break; + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != nl_pid) { + ret = -LIBBPF_ERRNO__WRNGPID; + goto done; + } + if (nh->nlmsg_seq != seq) { + ret = -LIBBPF_ERRNO__INVSEQ; + goto done; + } + if (nh->nlmsg_flags & NLM_F_MULTI) + multipart = true; + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + libbpf_nla_dump_errormsg(nh); + goto done; + case NLMSG_DONE: + return 0; + default: + break; + } + if (_fn) { + ret = _fn(nh, fn, cookie); + if (ret) + return ret; + } + } + } + ret = 0; +done: + return ret; +} + +static int __dump_class_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_class_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_class_nlmsg(cookie, t, tb); +} + +static int netlink_get_class(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_class_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTCLASS, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg, + dump_class_nlmsg, cookie); +} + +static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_qdisc_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_qdisc_nlmsg(cookie, t, tb); +} + +static int netlink_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETQDISC, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg, + dump_qdisc_nlmsg, cookie); +} + +static int __dump_filter_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_filter_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_filter_nlmsg(cookie, t, tb); +} + +static int netlink_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + dump_nlmsg_t dump_filter_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTFILTER, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + .t.tcm_parent = handle, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg, + dump_filter_nlmsg, cookie); +} + +static int __dump_link_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_link_nlmsg, void *cookie) +{ + struct nlattr *tb[IFLA_MAX + 1], *attr; + struct ifinfomsg *ifi = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); + attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); + if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_link_nlmsg(cookie, ifi, tb); +} + +static int netlink_get_link(int sock, unsigned int nl_pid, + dump_nlmsg_t dump_link_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_type = RTM_GETLINK, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .ifm.ifi_family = AF_PACKET, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, + dump_link_nlmsg, cookie); +} + static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; @@ -168,14 +433,14 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid, tcinfo.array_len = 0; tcinfo.is_qdisc = false; - ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex, - dump_class_qdisc_nlmsg, &tcinfo); + ret = netlink_get_class(sock, nl_pid, dev->ifindex, + dump_class_qdisc_nlmsg, &tcinfo); if (ret) goto out; tcinfo.is_qdisc = true; - ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex, - dump_class_qdisc_nlmsg, &tcinfo); + ret = netlink_get_qdisc(sock, nl_pid, dev->ifindex, + dump_class_qdisc_nlmsg, &tcinfo); if (ret) goto out; @@ -183,9 +448,9 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid, filter_info.ifindex = dev->ifindex; for (i = 0; i < tcinfo.used_len; i++) { filter_info.kind = tcinfo.handle_array[i].kind; - ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, - tcinfo.handle_array[i].handle, - dump_filter_nlmsg, &filter_info); + ret = netlink_get_filter(sock, nl_pid, dev->ifindex, + tcinfo.handle_array[i].handle, + dump_filter_nlmsg, &filter_info); if (ret) goto out; } @@ -193,22 +458,22 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid, /* root, ingress and egress handle */ handle = TC_H_ROOT; filter_info.kind = "root"; - ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, - dump_filter_nlmsg, &filter_info); + ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); if (ret) goto out; handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); filter_info.kind = "clsact/ingress"; - ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, - dump_filter_nlmsg, &filter_info); + ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); if (ret) goto out; handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS); filter_info.kind = "clsact/egress"; - ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, - dump_filter_nlmsg, &filter_info); + ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); if (ret) goto out; @@ -386,7 +651,7 @@ static int do_show(int argc, char **argv) struct bpf_attach_info attach_info = {}; int i, sock, ret, filter_idx = -1; struct bpf_netdev_t dev_array; - unsigned int nl_pid; + unsigned int nl_pid = 0; char err_buf[256]; if (argc == 2) { @@ -401,7 +666,7 @@ static int do_show(int argc, char **argv) if (ret) return -1; - sock = libbpf_netlink_open(&nl_pid); + sock = netlink_open(&nl_pid); if (sock < 0) { fprintf(stderr, "failed to open netlink sock\n"); return -1; @@ -416,7 +681,7 @@ static int do_show(int argc, char **argv) jsonw_start_array(json_wtr); NET_START_OBJECT; NET_START_ARRAY("xdp", "%s:\n"); - ret = libbpf_nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array); + ret = netlink_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array); NET_END_ARRAY("\n"); if (!ret) { diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 954bc2bd040c..65931e989eea 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -130,18 +130,6 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, __u32 *off); -struct nlattr; -typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); -int libbpf_netlink_open(unsigned int *nl_pid); -int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, - libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); - struct btf_ext_info { /* * info points to the individual info section (e.g. func_info and diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 312f887570b2..2465538a5ba9 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -22,6 +22,8 @@ #define SOL_NETLINK 270 #endif +typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); + typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, void *cookie); @@ -31,7 +33,7 @@ struct xdp_id_md { struct xdp_link_info info; }; -int libbpf_netlink_open(__u32 *nl_pid) +static int libbpf_netlink_open(__u32 *nl_pid) { struct sockaddr_nl sa; socklen_t addrlen; @@ -283,6 +285,9 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) return 0; } +static int libbpf_nl_get_link(int sock, unsigned int nl_pid, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); + int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags) { @@ -368,121 +373,3 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid, return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, dump_link_nlmsg, cookie); } - -static int __dump_class_nlmsg(struct nlmsghdr *nlh, - libbpf_dump_nlmsg_t dump_class_nlmsg, - void *cookie) -{ - struct nlattr *tb[TCA_MAX + 1], *attr; - struct tcmsg *t = NLMSG_DATA(nlh); - int len; - - len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); - attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); - if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) - return -LIBBPF_ERRNO__NLPARSE; - - return dump_class_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie) -{ - struct { - struct nlmsghdr nlh; - struct tcmsg t; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), - .nlh.nlmsg_type = RTM_GETTCLASS, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .t.tcm_family = AF_UNSPEC, - .t.tcm_ifindex = ifindex, - }; - int seq = time(NULL); - - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; - - return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg, - dump_class_nlmsg, cookie); -} - -static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, - void *cookie) -{ - struct nlattr *tb[TCA_MAX + 1], *attr; - struct tcmsg *t = NLMSG_DATA(nlh); - int len; - - len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); - attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); - if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) - return -LIBBPF_ERRNO__NLPARSE; - - return dump_qdisc_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) -{ - struct { - struct nlmsghdr nlh; - struct tcmsg t; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), - .nlh.nlmsg_type = RTM_GETQDISC, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .t.tcm_family = AF_UNSPEC, - .t.tcm_ifindex = ifindex, - }; - int seq = time(NULL); - - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; - - return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg, - dump_qdisc_nlmsg, cookie); -} - -static int __dump_filter_nlmsg(struct nlmsghdr *nlh, - libbpf_dump_nlmsg_t dump_filter_nlmsg, - void *cookie) -{ - struct nlattr *tb[TCA_MAX + 1], *attr; - struct tcmsg *t = NLMSG_DATA(nlh); - int len; - - len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); - attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); - if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) - return -LIBBPF_ERRNO__NLPARSE; - - return dump_filter_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, - libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie) -{ - struct { - struct nlmsghdr nlh; - struct tcmsg t; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), - .nlh.nlmsg_type = RTM_GETTFILTER, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .t.tcm_family = AF_UNSPEC, - .t.tcm_ifindex = ifindex, - .t.tcm_parent = handle, - }; - int seq = time(NULL); - - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; - - return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg, - dump_filter_nlmsg, cookie); -} -- cgit v1.3.1 From 85367030a6c7ef3373347cf816c698995074f6f0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 18:36:06 -0700 Subject: libbpf: Centralize poisoning and poison reallocarray() Most of libbpf source files already include libbpf_internal.h, so it's a good place to centralize identifier poisoning. So move kernel integer type poisoning there. And also add reallocarray to a poison list to prevent accidental use of it. libbpf_reallocarray() should be used universally instead. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200819013607.3607269-4-andriin@fb.com --- tools/lib/bpf/bpf.c | 3 --- tools/lib/bpf/bpf_prog_linfo.c | 3 --- tools/lib/bpf/btf.c | 3 --- tools/lib/bpf/btf_dump.c | 3 --- tools/lib/bpf/hashmap.c | 3 +++ tools/lib/bpf/libbpf.c | 3 --- tools/lib/bpf/libbpf_internal.h | 7 +++++++ tools/lib/bpf/libbpf_probes.c | 3 --- tools/lib/bpf/netlink.c | 3 --- tools/lib/bpf/nlattr.c | 9 +++------ tools/lib/bpf/ringbuf.c | 3 --- tools/lib/bpf/xsk.c | 3 --- 12 files changed, 13 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 0750681057c2..82b983ff6569 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -32,9 +32,6 @@ #include "libbpf.h" #include "libbpf_internal.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - /* * When building perf, unistd.h is overridden. __NR_bpf is * required to be defined explicitly. diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index bafca49cb1e6..3ed1a27b5f7c 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -8,9 +8,6 @@ #include "libbpf.h" #include "libbpf_internal.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - struct bpf_prog_linfo { void *raw_linfo; void *raw_jited_linfo; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1b7d85d94a07..a3d259e614b0 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -21,9 +21,6 @@ #include "libbpf_internal.h" #include "hashmap.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - #define BTF_MAX_NR_TYPES 0x7fffffffU #define BTF_MAX_STR_OFFSET 0x7fffffffU diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 1ad852ad0a86..0eaafd9bcfea 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -19,9 +19,6 @@ #include "libbpf.h" #include "libbpf_internal.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index a405dad068f5..3c20b126d60d 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -15,6 +15,9 @@ /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray + /* start with 4 buckets */ #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2653bcee73b7..4b96e0eefea8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -55,9 +55,6 @@ #include "libbpf_internal.h" #include "hashmap.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - #ifndef EM_BPF #define EM_BPF 247 #endif diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 65931e989eea..c8ed352671d5 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -10,6 +10,13 @@ #define __LIBBPF_LIBBPF_INTERNAL_H #include + +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray + #include "libbpf.h" #define BTF_INFO_ENC(kind, kind_flag, vlen) \ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 5a3d3f078408..010c9a76fd2b 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -17,9 +17,6 @@ #include "libbpf.h" #include "libbpf_internal.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - static bool grep(const char *buffer, const char *pattern) { return !!strstr(buffer, pattern); diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 2465538a5ba9..4dd73de00b6f 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -15,9 +15,6 @@ #include "libbpf_internal.h" #include "nlattr.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - #ifndef SOL_NETLINK #define SOL_NETLINK 270 #endif diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 0ad41dfea8eb..b607fa9852b1 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -7,14 +7,11 @@ */ #include -#include "nlattr.h" -#include "libbpf_internal.h" -#include #include #include - -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 +#include +#include "nlattr.h" +#include "libbpf_internal.h" static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { [LIBBPF_NLA_U8] = sizeof(uint8_t), diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 5bd234be8a14..5c6522c89af1 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -21,9 +21,6 @@ #include "libbpf_internal.h" #include "bpf.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - struct ring { ring_buffer_sample_fn sample_cb; void *ctx; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index f7f4efb70a4c..a9b02103767b 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -32,9 +32,6 @@ #include "libbpf_internal.h" #include "xsk.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - #ifndef SOL_XDP #define SOL_XDP 283 #endif -- cgit v1.3.1 From 22dd1ac91a77675238d10059c479ab6fa6160c60 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 18:36:07 -0700 Subject: tools: Remove feature-libelf-mmap feature detection It's trivial to handle missing ELF_C_MMAP_READ support in libelf the way that objtool has solved it in ("774bec3fddcc objtool: Add fallback from ELF_C_READ_MMAP to ELF_C_READ"). So instead of having an entire feature detector for that, just do what objtool does for perf and libbpf. And keep their Makefiles a bit simpler. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200819013607.3607269-5-andriin@fb.com --- tools/build/Makefile.feature | 1 - tools/build/feature/Makefile | 4 ---- tools/build/feature/test-all.c | 4 ---- tools/build/feature/test-libelf-mmap.c | 9 --------- tools/lib/bpf/Makefile | 6 +----- tools/lib/bpf/libbpf.c | 14 ++++++-------- tools/perf/Makefile.config | 4 ---- tools/perf/util/symbol.h | 2 +- 8 files changed, 8 insertions(+), 36 deletions(-) delete mode 100644 tools/build/feature/test-libelf-mmap.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index c1daf4d57518..38415d251075 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -46,7 +46,6 @@ FEATURE_TESTS_BASIC := \ libelf-getphdrnum \ libelf-gelf_getnote \ libelf-getshdrstrndx \ - libelf-mmap \ libnuma \ numa_num_possible_cpus \ libperl \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index d220fe952747..b2a2347c67ed 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -25,7 +25,6 @@ FILES= \ test-libelf-getphdrnum.bin \ test-libelf-gelf_getnote.bin \ test-libelf-getshdrstrndx.bin \ - test-libelf-mmap.bin \ test-libdebuginfod.bin \ test-libnuma.bin \ test-numa_num_possible_cpus.bin \ @@ -146,9 +145,6 @@ $(OUTPUT)test-dwarf.bin: $(OUTPUT)test-dwarf_getlocations.bin: $(BUILD) $(DWARFLIBS) -$(OUTPUT)test-libelf-mmap.bin: - $(BUILD) -lelf - $(OUTPUT)test-libelf-getphdrnum.bin: $(BUILD) -lelf diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 5479e543b194..5284e6e9c756 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -30,10 +30,6 @@ # include "test-libelf.c" #undef main -#define main main_test_libelf_mmap -# include "test-libelf-mmap.c" -#undef main - #define main main_test_get_current_dir_name # include "test-get_current_dir_name.c" #undef main diff --git a/tools/build/feature/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c deleted file mode 100644 index 2c3ef81affe2..000000000000 --- a/tools/build/feature/test-libelf-mmap.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -int main(void) -{ - Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0); - - return (long)elf; -} diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 621ad96d06fd..c5dbfafdf889 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -56,7 +56,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap zlib bpf +FEATURE_TESTS = libelf zlib bpf FEATURE_DISPLAY = libelf zlib bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -98,10 +98,6 @@ else CFLAGS := -g -Wall endif -ifeq ($(feature-libelf-mmap), 1) - override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT -endif - # Append required CFLAGS override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum override CFLAGS += -Werror -Wall diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4b96e0eefea8..4a81c6b2d21b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -150,12 +150,6 @@ static void pr_perm_msg(int err) ___err; }) #endif -#ifdef HAVE_LIBELF_MMAP_SUPPORT -# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP -#else -# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ -#endif - static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -1064,6 +1058,11 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.obj_buf_sz = 0; } +/* if libelf is old and doesn't support mmap(), fall back to read() */ +#ifndef ELF_C_READ_MMAP +#define ELF_C_READ_MMAP ELF_C_READ +#endif + static int bpf_object__elf_init(struct bpf_object *obj) { int err = 0; @@ -1092,8 +1091,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) return err; } - obj->efile.elf = elf_begin(obj->efile.fd, - LIBBPF_ELF_C_READ_MMAP, NULL); + obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); } if (!obj->efile.elf) { diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 190be4fa5c21..81bb099f6f06 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -483,10 +483,6 @@ ifndef NO_LIBELF EXTLIBS += -lelf $(call detected,CONFIG_LIBELF) - ifeq ($(feature-libelf-mmap), 1) - CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT - endif - ifeq ($(feature-libelf-getphdrnum), 1) CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT endif diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ff4f4c47e148..03e264a27cd3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -28,7 +28,7 @@ struct option; * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; * for newer versions we can use mmap to reduce memory usage: */ -#ifdef HAVE_LIBELF_MMAP_SUPPORT +#ifdef ELF_C_READ_MMAP # define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP #else # define PERF_ELF_C_READ_MMAP ELF_C_READ -- cgit v1.3.1 From 3fc32f40c40207bf85ce1b007f18981c4673df96 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 12:45:15 -0700 Subject: libbpf: Implement type-based CO-RE relocations support Implement support for TYPE_EXISTS/TYPE_SIZE/TYPE_ID_LOCAL/TYPE_ID_REMOTE relocations. These are examples of type-based relocations, as opposed to field-based relocations supported already. The difference is that they are calculating relocation values based on the type itself, not a field within a struct/union. Type-based relos have slightly different semantics when matching local types to kernel target types, see comments in bpf_core_types_are_compat() for details. Their behavior on failure to find target type in kernel BTF also differs. Instead of "poisoning" relocatable instruction and failing load subsequently in kernel, they return 0 (which is rarely a valid return result, so user BPF code can use that to detect success/failure of the relocation and deal with it without extra "guarding" relocations). Also, it's always possible to check existence of the type in target kernel with TYPE_EXISTS relocation, similarly to a field-based FIELD_EXISTS. TYPE_ID_LOCAL relocation is a bit special in that it always succeeds (barring any libbpf/Clang bugs) and resolved to BTF ID using **local** BTF info of BPF program itself. Tests in subsequent patches demonstrate the usage and semantics of new relocations. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200819194519.3375898-2-andriin@fb.com --- tools/lib/bpf/bpf_core_read.h | 52 ++++++++- tools/lib/bpf/libbpf.c | 231 ++++++++++++++++++++++++++++++++++++---- tools/lib/bpf/libbpf_internal.h | 4 + 3 files changed, 263 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 03152cb143b7..684bfb86361a 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -19,6 +19,18 @@ enum bpf_field_info_kind { BPF_FIELD_RSHIFT_U64 = 5, }; +/* second argument to __builtin_btf_type_id() built-in */ +enum bpf_type_id_kind { + BPF_TYPE_ID_LOCAL = 0, /* BTF type ID in local program */ + BPF_TYPE_ID_TARGET = 1, /* BTF type ID in target kernel */ +}; + +/* second argument to __builtin_preserve_type_info() built-in */ +enum bpf_type_info_kind { + BPF_TYPE_EXISTS = 0, /* type existence in target kernel */ + BPF_TYPE_SIZE = 1, /* type size in target kernel */ +}; + #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) @@ -94,12 +106,50 @@ enum bpf_field_info_kind { __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) /* - * Convenience macro to get byte size of a field. Works for integers, + * Convenience macro to get the byte size of a field. Works for integers, * struct/unions, pointers, arrays, and enums. */ #define bpf_core_field_size(field) \ __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) +/* + * Convenience macro to get BTF type ID of a specified type, using a local BTF + * information. Return 32-bit unsigned integer with type ID from program's own + * BTF. Always succeeds. + */ +#define bpf_core_type_id_local(type) \ + __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL) + +/* + * Convenience macro to get BTF type ID of a target kernel's type that matches + * specified local type. + * Returns: + * - valid 32-bit unsigned type ID in kernel BTF; + * - 0, if no matching type was found in a target kernel BTF. + */ +#define bpf_core_type_id_kernel(type) \ + __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET) + +/* + * Convenience macro to check that provided named type + * (struct/union/enum/typedef) exists in a target kernel. + * Returns: + * 1, if such type is present in target kernel's BTF; + * 0, if no matching type is found. + */ +#define bpf_core_type_exists(type) \ + __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) + +/* + * Convenience macro to get the byte size of a provided named type + * (struct/union/enum/typedef) in a target kernel. + * Returns: + * >= 0 size (in bytes), if type is present in target kernel's BTF; + * 0, if no matching type is found. + */ +#define bpf_core_type_size(type) \ + __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE) + /* * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures * offset relocation for source address using __builtin_preserve_access_index() diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4a81c6b2d21b..882759dfc33e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4111,6 +4111,10 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) case BPF_FIELD_SIGNED: return "signed"; case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; + case BPF_TYPE_ID_LOCAL: return "local_type_id"; + case BPF_TYPE_ID_TARGET: return "target_type_id"; + case BPF_TYPE_EXISTS: return "type_exists"; + case BPF_TYPE_SIZE: return "type_size"; default: return "unknown"; } } @@ -4130,6 +4134,19 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) } } +static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_TYPE_ID_LOCAL: + case BPF_TYPE_ID_TARGET: + case BPF_TYPE_EXISTS: + case BPF_TYPE_SIZE: + return true; + default: + return false; + } +} + /* * Turn bpf_core_relo into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting @@ -4160,6 +4177,9 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) * - field 'a' access (corresponds to '2' in low-level spec); * - array element #3 access (corresponds to '3' in low-level spec). * + * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, + * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their + * spec and raw_spec are kept empty. */ static int bpf_core_parse_spec(const struct btf *btf, __u32 type_id, @@ -4182,6 +4202,13 @@ static int bpf_core_parse_spec(const struct btf *btf, spec->root_type_id = type_id; spec->relo_kind = relo_kind; + /* type-based relocations don't have a field access string */ + if (core_relo_is_type_based(relo_kind)) { + if (strcmp(spec_str, "0")) + return -EINVAL; + return 0; + } + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ while (*spec_str) { if (*spec_str == ':') @@ -4317,7 +4344,7 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, const struct btf *targ_btf) { size_t local_essent_len, targ_essent_len; - const char *local_name, *targ_name, *targ_kind; + const char *local_name, *targ_name; const struct btf_type *t, *local_t; struct ids_vec *cand_ids; __u32 *new_ids; @@ -4339,13 +4366,11 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, n = btf__get_nr_types(targ_btf); for (i = 1; i <= n; i++) { t = btf__type_by_id(targ_btf, i); - targ_name = btf__name_by_offset(targ_btf, t->name_off); - if (str_is_empty(targ_name)) + if (btf_kind(t) != btf_kind(local_t)) continue; - targ_kind = btf_kind_str(t); - t = skip_mods_and_typedefs(targ_btf, i, NULL); - if (!btf_is_composite(t) && !btf_is_array(t)) + targ_name = btf__name_by_offset(targ_btf, t->name_off); + if (str_is_empty(targ_name)) continue; targ_essent_len = bpf_core_essential_name_len(targ_name); @@ -4355,7 +4380,7 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (strncmp(local_name, targ_name, local_essent_len) == 0) { pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n", local_type_id, btf_kind_str(local_t), - local_name, i, targ_kind, targ_name); + local_name, i, btf_kind_str(t), targ_name); new_ids = libbpf_reallocarray(cand_ids->data, cand_ids->len + 1, sizeof(*cand_ids->data)); @@ -4373,8 +4398,9 @@ err_out: return ERR_PTR(err); } -/* Check two types for compatibility, skipping const/volatile/restrict and - * typedefs, to ensure we are relocating compatible entities: +/* Check two types for compatibility for the purpose of field access + * relocation. const/volatile/restrict and typedefs are skipped to ensure we + * are relocating semantically compatible entities: * - any two STRUCTs/UNIONs are compatible and can be mixed; * - any two FWDs are compatible, if their names match (modulo flavor suffix); * - any two PTRs are always compatible; @@ -4529,6 +4555,100 @@ static int bpf_core_match_member(const struct btf *local_btf, return 0; } +/* Check local and target types for compatibility. This check is used for + * type-based CO-RE relocations and follow slightly different rules than + * field-based relocations. This function assumes that root types were already + * checked for name match. Beyond that initial root-level name check, names + * are completely ignored. Compatibility rules are as follows: + * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but + * kind should match for local and target types (i.e., STRUCT is not + * compatible with UNION); + * - for ENUMs, the size is ignored; + * - for INT, size and signedness are ignored; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - CONST/VOLATILE/RESTRICT modifiers are ignored; + * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; + * - FUNC_PROTOs are compatible if they have compatible signature: same + * number of input args and compatible return and argument types. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ +static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id) +{ + const struct btf_type *local_type, *targ_type; + int depth = 32; /* max recursion depth */ + + /* caller made sure that names match (ignoring flavor suffix) */ + local_type = btf__type_by_id(local_btf, local_id); + targ_type = btf__type_by_id(local_btf, local_id); + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + +recur: + depth--; + if (depth < 0) + return -EINVAL; + + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_UNKN: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + return 1; + case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ + return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; + case BTF_KIND_PTR: + local_id = local_type->type; + targ_id = targ_type->type; + goto recur; + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + case BTF_KIND_FUNC_PROTO: { + struct btf_param *local_p = btf_params(local_type); + struct btf_param *targ_p = btf_params(targ_type); + __u16 local_vlen = btf_vlen(local_type); + __u16 targ_vlen = btf_vlen(targ_type); + int i, err; + + if (local_vlen != targ_vlen) + return 0; + + for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { + skip_mods_and_typedefs(local_btf, local_p->type, &local_id); + skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); + err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id); + if (err <= 0) + return err; + } + + /* tail recurse for return type check */ + skip_mods_and_typedefs(local_btf, local_type->type, &local_id); + skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); + goto recur; + } + default: + pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", + btf_kind_str(local_type), local_id, targ_id); + return 0; + } +} + /* * Try to match local spec to a target type and, if successful, produce full * target spec (high-level, low-level + bit offset). @@ -4547,6 +4667,12 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, targ_spec->root_type_id = targ_id; targ_spec->relo_kind = local_spec->relo_kind; + if (core_relo_is_type_based(local_spec->relo_kind)) { + return bpf_core_types_are_compat(local_spec->btf, + local_spec->root_type_id, + targ_btf, targ_id); + } + local_acc = &local_spec->spec[0]; targ_acc = &targ_spec->spec[0]; @@ -4720,6 +4846,40 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, return 0; } +static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, + __u32 *val) +{ + __s64 sz; + + /* type-based relos return zero when target type is not found */ + if (!spec) { + *val = 0; + return 0; + } + + switch (relo->kind) { + case BPF_TYPE_ID_TARGET: + *val = spec->root_type_id; + break; + case BPF_TYPE_EXISTS: + *val = 1; + break; + case BPF_TYPE_SIZE: + sz = btf__resolve_size(spec->btf, spec->root_type_id); + if (sz < 0) + return -EINVAL; + *val = sz; + break; + case BPF_TYPE_ID_LOCAL: + /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ + default: + return -EOPNOTSUPP; + } + + return 0; +} + struct bpf_core_relo_res { /* expected value in the instruction, unless validate == false */ @@ -4755,6 +4915,9 @@ static int bpf_core_calc_relo(const struct bpf_program *prog, if (core_relo_is_field_based(relo->kind)) { err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate); err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL); + } else if (core_relo_is_type_based(relo->kind)) { + err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); + err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); } if (err == -EUCLEAN) { @@ -4894,6 +5057,9 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "" : s); + if (core_relo_is_type_based(spec->relo_kind)) + return; + if (core_relo_is_field_based(spec->relo_kind)) { for (i = 0; i < spec->len; i++) { if (spec->spec[i].name) @@ -4911,6 +5077,7 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) spec->bit_offset / 8, spec->bit_offset % 8); else libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); + return; } } @@ -4979,12 +5146,12 @@ static void *u32_as_hash_key(__u32 x) * between multiple relocations for the same type ID and is updated as some * of the candidates are pruned due to structural incompatibility. */ -static int bpf_core_reloc_field(struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct btf *local_btf, - const struct btf *targ_btf, - struct hashmap *cand_cache) +static int bpf_core_apply_relo(struct bpf_program *prog, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + const struct btf *targ_btf, + struct hashmap *cand_cache) { const char *prog_name = bpf_program__title(prog, false); struct bpf_core_spec local_spec, cand_spec, targ_spec; @@ -5003,7 +5170,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog, return -EINVAL; local_name = btf__name_by_offset(local_btf, local_type->name_off); - if (str_is_empty(local_name)) + if (!local_name) return -EINVAL; spec_str = btf__name_by_offset(local_btf, relo->access_str_off); @@ -5014,7 +5181,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog, if (err) { pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), - local_name, spec_str, err); + str_is_empty(local_name) ? "" : local_name, + spec_str, err); return -EINVAL; } @@ -5023,12 +5191,28 @@ static int bpf_core_reloc_field(struct bpf_program *prog, bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); libbpf_print(LIBBPF_DEBUG, "\n"); + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ + if (relo->kind == BPF_TYPE_ID_LOCAL) { + targ_res.validate = true; + targ_res.poison = false; + targ_res.orig_val = local_spec.root_type_id; + targ_res.new_val = local_spec.root_type_id; + goto patch_insn; + } + + /* libbpf doesn't support candidate search for anonymous types */ + if (str_is_empty(spec_str)) { + pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", + prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); + return -EOPNOTSUPP; + } + if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); if (IS_ERR(cand_ids)) { pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld", - prog_name, relo_idx, local_id, btf_kind_str(local_type), local_name, - PTR_ERR(cand_ids)); + prog_name, relo_idx, local_id, btf_kind_str(local_type), + local_name, PTR_ERR(cand_ids)); return PTR_ERR(cand_ids); } err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); @@ -5084,7 +5268,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog, return -EINVAL; } - cand_ids->data[j++] = cand_spec.spec[0].type_id; + cand_ids->data[j++] = cand_spec.root_type_id; } /* @@ -5103,7 +5287,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog, * as well as expected case, depending whether instruction w/ * relocation is guarded in some way that makes it unreachable (dead * code) if relocation can't be resolved. This is handled in - * bpf_core_reloc_insn() uniformly by replacing that instruction with + * bpf_core_patch_insn() uniformly by replacing that instruction with * BPF helper call insn (using invalid helper ID). If that instruction * is indeed unreachable, then it will be ignored and eliminated by * verifier. If it was an error, then verifier will complain and point @@ -5119,6 +5303,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog, return err; } +patch_insn: /* bpf_core_patch_insn() should know how to handle missing targ_spec */ err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res); if (err) { @@ -5186,8 +5371,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { - err = bpf_core_reloc_field(prog, rec, i, obj->btf, - targ_btf, cand_cache); + err = bpf_core_apply_relo(prog, rec, i, obj->btf, + targ_btf, cand_cache); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", sec_name, i, err); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index c8ed352671d5..edd3511aa242 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -238,6 +238,10 @@ enum bpf_core_relo_kind { BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_TYPE_SIZE = 9, /* type size in bytes */ }; /* The minimum bpf_core_relo checked by the loader -- cgit v1.3.1 From 124a892d1c416dc4f639031afb79d47857d9b0b0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 12:45:16 -0700 Subject: selftests/bpf: Test TYPE_EXISTS and TYPE_SIZE CO-RE relocations Add selftests for TYPE_EXISTS and TYPE_SIZE relocations, testing correctness of relocations and handling of type compatiblity/incompatibility. If __builtin_preserve_type_info() is not supported by compiler, skip tests. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200819194519.3375898-3-andriin@fb.com --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 125 ++++++++++--- .../bpf/progs/btf__core_reloc_type_based.c | 3 + .../btf__core_reloc_type_based___all_missing.c | 3 + .../progs/btf__core_reloc_type_based___diff_sz.c | 3 + .../btf__core_reloc_type_based___fn_wrong_args.c | 3 + .../progs/btf__core_reloc_type_based___incompat.c | 3 + .../testing/selftests/bpf/progs/core_reloc_types.h | 202 ++++++++++++++++++++- .../selftests/bpf/progs/test_core_reloc_kernel.c | 2 + .../bpf/progs/test_core_reloc_type_based.c | 124 +++++++++++++ 9 files changed, 446 insertions(+), 22 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 4d650e99be28..b775ce0ede41 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -177,14 +177,13 @@ .fails = true, \ } -#define EXISTENCE_CASE_COMMON(name) \ +#define FIELD_EXISTS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_existence.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .relaxed_core_relocs = true + .btf_src_file = "btf__core_reloc_" #name ".o" \ -#define EXISTENCE_ERR_CASE(name) { \ - EXISTENCE_CASE_COMMON(name), \ +#define FIELD_EXISTS_ERR_CASE(name) { \ + FIELD_EXISTS_CASE_COMMON(name), \ .fails = true, \ } @@ -253,6 +252,23 @@ .fails = true, \ } +#define TYPE_BASED_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_type_based.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define TYPE_BASED_CASE(name, ...) { \ + TYPE_BASED_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_type_based_output), \ +} + +#define TYPE_BASED_ERR_CASE(name) { \ + TYPE_BASED_CASE_COMMON(name), \ + .fails = true, \ +} + struct core_reloc_test_case { const char *case_name; const char *bpf_obj_file; @@ -364,7 +380,7 @@ static struct core_reloc_test_case test_cases[] = { /* validate field existence checks */ { - EXISTENCE_CASE_COMMON(existence), + FIELD_EXISTS_CASE_COMMON(existence), .input = STRUCT_TO_CHAR_PTR(core_reloc_existence) { .a = 1, .b = 2, @@ -388,7 +404,7 @@ static struct core_reloc_test_case test_cases[] = { .output_len = sizeof(struct core_reloc_existence_output), }, { - EXISTENCE_CASE_COMMON(existence___minimal), + FIELD_EXISTS_CASE_COMMON(existence___minimal), .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) { .a = 42, }, @@ -408,12 +424,12 @@ static struct core_reloc_test_case test_cases[] = { .output_len = sizeof(struct core_reloc_existence_output), }, - EXISTENCE_ERR_CASE(existence__err_int_sz), - EXISTENCE_ERR_CASE(existence__err_int_type), - EXISTENCE_ERR_CASE(existence__err_int_kind), - EXISTENCE_ERR_CASE(existence__err_arr_kind), - EXISTENCE_ERR_CASE(existence__err_arr_value_type), - EXISTENCE_ERR_CASE(existence__err_struct_type), + FIELD_EXISTS_ERR_CASE(existence__err_int_sz), + FIELD_EXISTS_ERR_CASE(existence__err_int_type), + FIELD_EXISTS_ERR_CASE(existence__err_int_kind), + FIELD_EXISTS_ERR_CASE(existence__err_arr_kind), + FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type), + FIELD_EXISTS_ERR_CASE(existence__err_struct_type), /* bitfield relocation checks */ BITFIELDS_CASE(bitfields, { @@ -453,11 +469,73 @@ static struct core_reloc_test_case test_cases[] = { SIZE_CASE(size), SIZE_CASE(size___diff_sz), SIZE_ERR_CASE(size___err_ambiguous), + + /* validate type existence and size relocations */ + TYPE_BASED_CASE(type_based, { + .struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + .struct_sz = sizeof(struct a_struct), + .union_sz = sizeof(union a_union), + .enum_sz = sizeof(enum an_enum), + .typedef_named_struct_sz = sizeof(named_struct_typedef), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef), + .typedef_int_sz = sizeof(int_typedef), + .typedef_enum_sz = sizeof(enum_typedef), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef), + .typedef_func_proto_sz = sizeof(func_proto_typedef), + .typedef_arr_sz = sizeof(arr_typedef), + }), + TYPE_BASED_CASE(type_based___all_missing, { + /* all zeros */ + }), + TYPE_BASED_CASE(type_based___diff_sz, { + .struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + .struct_sz = sizeof(struct a_struct___diff_sz), + .union_sz = sizeof(union a_union___diff_sz), + .enum_sz = sizeof(enum an_enum___diff_sz), + .typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz), + .typedef_int_sz = sizeof(int_typedef___diff_sz), + .typedef_enum_sz = sizeof(enum_typedef___diff_sz), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz), + .typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz), + .typedef_arr_sz = sizeof(arr_typedef___diff_sz), + }), + TYPE_BASED_CASE(type_based___incompat, { + .enum_exists = 1, + .enum_sz = sizeof(enum an_enum), + }), + TYPE_BASED_CASE(type_based___fn_wrong_args, { + .struct_exists = 1, + .struct_sz = sizeof(struct a_struct), + }), }; struct data { char in[256]; char out[256]; + bool skip; uint64_t my_pid_tgid; }; @@ -516,15 +594,10 @@ void test_core_reloc(void) load_attr.log_level = 0; load_attr.target_btf_path = test_case->btf_src_file; err = bpf_object__load_xattr(&load_attr); - if (test_case->fails) { - CHECK(!err, "obj_load_fail", - "should fail to load prog '%s'\n", probe_name); + if (err) { + if (!test_case->fails) + CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err); goto cleanup; - } else { - if (CHECK(err, "obj_load", - "failed to load prog '%s': %d\n", - probe_name, err)) - goto cleanup; } data_map = bpf_object__find_map_by_name(obj, "test_cor.bss"); @@ -552,6 +625,16 @@ void test_core_reloc(void) /* trigger test run */ usleep(1); + if (data->skip) { + test__skip(); + goto cleanup; + } + + if (test_case->fails) { + CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name); + goto cleanup; + } + equal = memcmp(data->out, test_case->output, test_case->output_len) == 0; if (CHECK(!equal, "check_result", diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c new file mode 100644 index 000000000000..fc3f69e58c71 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c new file mode 100644 index 000000000000..51511648b4ec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___all_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c new file mode 100644 index 000000000000..67db3dceb279 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___diff_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c new file mode 100644 index 000000000000..b357fc65431d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___fn_wrong_args x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c new file mode 100644 index 000000000000..8ddf20d33d9e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___incompat x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 3b1126c0bc8f..d998537867a2 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -652,7 +652,7 @@ struct core_reloc_misc_extensible { }; /* - * EXISTENCE + * FIELD EXISTENCE */ struct core_reloc_existence_output { int a_exists; @@ -834,3 +834,203 @@ struct core_reloc_size___err_ambiguous2 { void *ptr_field; enum { VALUE___2 = 123 } enum_field; }; + +/* + * TYPE EXISTENCE & SIZE + */ +struct core_reloc_type_based_output { + bool struct_exists; + bool union_exists; + bool enum_exists; + bool typedef_named_struct_exists; + bool typedef_anon_struct_exists; + bool typedef_struct_ptr_exists; + bool typedef_int_exists; + bool typedef_enum_exists; + bool typedef_void_ptr_exists; + bool typedef_func_proto_exists; + bool typedef_arr_exists; + + int struct_sz; + int union_sz; + int enum_sz; + int typedef_named_struct_sz; + int typedef_anon_struct_sz; + int typedef_struct_ptr_sz; + int typedef_int_sz; + int typedef_enum_sz; + int typedef_void_ptr_sz; + int typedef_func_proto_sz; + int typedef_arr_sz; +}; + +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +typedef struct a_struct named_struct_typedef; + +typedef struct { int x, y, z; } anon_struct_typedef; + +typedef struct { + int a, b, c; +} *struct_ptr_typedef; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef int int_typedef; + +typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; + +typedef void *void_ptr_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_based { + struct a_struct f1; + union a_union f2; + enum an_enum f3; + named_struct_typedef f4; + anon_struct_typedef f5; + struct_ptr_typedef f6; + int_typedef f7; + enum_typedef f8; + void_ptr_typedef f9; + func_proto_typedef f10; + arr_typedef f11; +}; + +/* no types in target */ +struct core_reloc_type_based___all_missing { +}; + +/* different type sizes, extra modifiers, anon vs named enums, etc */ +struct a_struct___diff_sz { + long x; + int y; + char z; +}; + +union a_union___diff_sz { + char yy; + char zz; +}; + +typedef struct a_struct___diff_sz named_struct_typedef___diff_sz; + +typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz; + +typedef struct { + char aa[1], bb[2], cc[3]; +} *struct_ptr_typedef___diff_sz; + +enum an_enum___diff_sz { + AN_ENUM_VAL1___diff_sz = 0x123412341234, + AN_ENUM_VAL2___diff_sz = 2, +}; + +typedef unsigned long int_typedef___diff_sz; + +typedef enum an_enum___diff_sz enum_typedef___diff_sz; + +typedef const void * const void_ptr_typedef___diff_sz; + +typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char); + +typedef int arr_typedef___diff_sz[2]; + +struct core_reloc_type_based___diff_sz { + struct a_struct___diff_sz f1; + union a_union___diff_sz f2; + enum an_enum___diff_sz f3; + named_struct_typedef___diff_sz f4; + anon_struct_typedef___diff_sz f5; + struct_ptr_typedef___diff_sz f6; + int_typedef___diff_sz f7; + enum_typedef___diff_sz f8; + void_ptr_typedef___diff_sz f9; + func_proto_typedef___diff_sz f10; + arr_typedef___diff_sz f11; +}; + +/* incompatibilities between target and local types */ +union a_struct___incompat { /* union instead of struct */ + int x; +}; + +struct a_union___incompat { /* struct instead of union */ + int y; + int z; +}; + +/* typedef to union, not to struct */ +typedef union a_struct___incompat named_struct_typedef___incompat; + +/* typedef to void pointer, instead of struct */ +typedef void *anon_struct_typedef___incompat; + +/* extra pointer indirection */ +typedef struct { + int a, b, c; +} **struct_ptr_typedef___incompat; + +/* typedef of a struct with int, instead of int */ +typedef struct { int x; } int_typedef___incompat; + +/* typedef to func_proto, instead of enum */ +typedef int (*enum_typedef___incompat)(void); + +/* pointer to char instead of void */ +typedef char *void_ptr_typedef___incompat; + +/* void return type instead of int */ +typedef void (*func_proto_typedef___incompat)(long); + +/* multi-dimensional array instead of a single-dimensional */ +typedef int arr_typedef___incompat[20][2]; + +struct core_reloc_type_based___incompat { + union a_struct___incompat f1; + struct a_union___incompat f2; + /* the only valid one is enum, to check that something still succeeds */ + enum an_enum f3; + named_struct_typedef___incompat f4; + anon_struct_typedef___incompat f5; + struct_ptr_typedef___incompat f6; + int_typedef___incompat f7; + enum_typedef___incompat f8; + void_ptr_typedef___incompat f9; + func_proto_typedef___incompat f10; + arr_typedef___incompat f11; +}; + +/* func_proto with incompatible signature */ +typedef void (*func_proto_typedef___fn_wrong_ret1)(long); +typedef int * (*func_proto_typedef___fn_wrong_ret2)(long); +typedef struct { int x; } int_struct_typedef; +typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long); +typedef int (*func_proto_typedef___fn_wrong_arg)(void *); +typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long); +typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void); + +struct core_reloc_type_based___fn_wrong_args { + /* one valid type to make sure relos still work */ + struct a_struct f1; + func_proto_typedef___fn_wrong_ret1 f2; + func_proto_typedef___fn_wrong_ret2 f3; + func_proto_typedef___fn_wrong_ret3 f4; + func_proto_typedef___fn_wrong_arg f5; + func_proto_typedef___fn_wrong_arg_cnt1 f6; + func_proto_typedef___fn_wrong_arg_cnt2 f7; +}; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index aba928fd60d3..145028b52ad8 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL"; struct { char in[256]; char out[256]; + bool skip; uint64_t my_pid_tgid; } data = {}; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c new file mode 100644 index 000000000000..6ab259d02dc0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +typedef struct a_struct named_struct_typedef; + +typedef struct { int x, y, z; } anon_struct_typedef; + +typedef struct { + int a, b, c; +} *struct_ptr_typedef; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef int int_typedef; + +typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; + +typedef void *void_ptr_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_based { + struct a_struct f1; + union a_union f2; + enum an_enum f3; + named_struct_typedef f4; + anon_struct_typedef f5; + struct_ptr_typedef f6; + int_typedef f7; + enum_typedef f8; + void_ptr_typedef f9; + func_proto_typedef f10; + arr_typedef f11; +}; + +struct core_reloc_type_based_output { + bool struct_exists; + bool union_exists; + bool enum_exists; + bool typedef_named_struct_exists; + bool typedef_anon_struct_exists; + bool typedef_struct_ptr_exists; + bool typedef_int_exists; + bool typedef_enum_exists; + bool typedef_void_ptr_exists; + bool typedef_func_proto_exists; + bool typedef_arr_exists; + + int struct_sz; + int union_sz; + int enum_sz; + int typedef_named_struct_sz; + int typedef_anon_struct_sz; + int typedef_struct_ptr_sz; + int typedef_int_sz; + int typedef_enum_sz; + int typedef_void_ptr_sz; + int typedef_func_proto_sz; + int typedef_arr_sz; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_type_based(void *ctx) +{ +#if __has_builtin(__builtin_preserve_type_info) + struct core_reloc_type_based_output *out = (void *)&data.out; + + out->struct_exists = bpf_core_type_exists(struct a_struct); + out->union_exists = bpf_core_type_exists(union a_union); + out->enum_exists = bpf_core_type_exists(enum an_enum); + out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef); + out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef); + out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef); + out->typedef_int_exists = bpf_core_type_exists(int_typedef); + out->typedef_enum_exists = bpf_core_type_exists(enum_typedef); + out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef); + out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef); + out->typedef_arr_exists = bpf_core_type_exists(arr_typedef); + + out->struct_sz = bpf_core_type_size(struct a_struct); + out->union_sz = bpf_core_type_size(union a_union); + out->enum_sz = bpf_core_type_size(enum an_enum); + out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef); + out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef); + out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef); + out->typedef_int_sz = bpf_core_type_size(int_typedef); + out->typedef_enum_sz = bpf_core_type_size(enum_typedef); + out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef); + out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef); + out->typedef_arr_sz = bpf_core_type_size(arr_typedef); +#else + data.skip = true; +#endif + return 0; +} -- cgit v1.3.1 From 4836bf5e2ec6e410c7f0c044402951484901a676 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 12:45:17 -0700 Subject: selftests/bpf: Add CO-RE relo test for TYPE_ID_LOCAL/TYPE_ID_TARGET Add tests for BTF type ID relocations. To allow testing this, enhance core_relo.c test runner to allow dynamic initialization of test inputs. If Clang doesn't have necessary support for new functionality, test is skipped. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200819194519.3375898-4-andriin@fb.com --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 168 ++++++++++++++++++++- .../selftests/bpf/progs/btf__core_reloc_type_id.c | 3 + .../btf__core_reloc_type_id___missing_targets.c | 3 + .../testing/selftests/bpf/progs/core_reloc_types.h | 41 +++++ .../bpf/progs/test_core_reloc_type_based.c | 14 -- .../selftests/bpf/progs/test_core_reloc_type_id.c | 113 ++++++++++++++ 6 files changed, 323 insertions(+), 19 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index b775ce0ede41..ad550510ef69 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -3,6 +3,9 @@ #include "progs/core_reloc_types.h" #include #include +#include + +static int duration = 0; #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) @@ -269,6 +272,27 @@ .fails = true, \ } +#define TYPE_ID_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_type_id.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define TYPE_ID_CASE(name, setup_fn) { \ + TYPE_ID_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {}, \ + .output_len = sizeof(struct core_reloc_type_id_output), \ + .setup = setup_fn, \ +} + +#define TYPE_ID_ERR_CASE(name) { \ + TYPE_ID_CASE_COMMON(name), \ + .fails = true, \ +} + +struct core_reloc_test_case; + +typedef int (*setup_test_fn)(struct core_reloc_test_case *test); + struct core_reloc_test_case { const char *case_name; const char *bpf_obj_file; @@ -280,8 +304,136 @@ struct core_reloc_test_case { bool fails; bool relaxed_core_relocs; bool direct_raw_tp; + setup_test_fn setup; }; +static int find_btf_type(const struct btf *btf, const char *name, __u32 kind) +{ + int id; + + id = btf__find_by_name_kind(btf, name, kind); + if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id)) + return -1; + + return id; +} + +static int setup_type_id_case_local(struct core_reloc_test_case *test) +{ + struct core_reloc_type_id_output *exp = (void *)test->output; + struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL); + struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); + const struct btf_type *t; + const char *name; + int i; + + if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) || + CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) { + btf__free(local_btf); + btf__free(targ_btf); + return -EINVAL; + } + + exp->local_anon_struct = -1; + exp->local_anon_union = -1; + exp->local_anon_enum = -1; + exp->local_anon_func_proto_ptr = -1; + exp->local_anon_void_ptr = -1; + exp->local_anon_arr = -1; + + for (i = 1; i <= btf__get_nr_types(local_btf); i++) + { + t = btf__type_by_id(local_btf, i); + /* we are interested only in anonymous types */ + if (t->name_off) + continue; + + if (btf_is_struct(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) && + strcmp(name, "marker_field") == 0) { + exp->local_anon_struct = i; + } else if (btf_is_union(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) && + strcmp(name, "marker_field") == 0) { + exp->local_anon_union = i; + } else if (btf_is_enum(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) && + strcmp(name, "MARKER_ENUM_VAL") == 0) { + exp->local_anon_enum = i; + } else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) { + if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) && + btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) && + strcmp(name, "_Bool") == 0) { + /* ptr -> func_proto -> _Bool */ + exp->local_anon_func_proto_ptr = i; + } else if (btf_is_void(t)) { + /* ptr -> void */ + exp->local_anon_void_ptr = i; + } + } else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) && + btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) && + strcmp(name, "_Bool") == 0) { + /* _Bool[] */ + exp->local_anon_arr = i; + } + } + + exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT); + exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION); + exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM); + exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT); + exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF); + exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF); + exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF); + + btf__free(local_btf); + btf__free(targ_btf); + return 0; +} + +static int setup_type_id_case_success(struct core_reloc_test_case *test) { + struct core_reloc_type_id_output *exp = (void *)test->output; + struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); + int err; + + err = setup_type_id_case_local(test); + if (err) + return err; + + targ_btf = btf__parse(test->btf_src_file, NULL); + + exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT); + exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION); + exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM); + exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT); + exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF); + exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF); + exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF); + + btf__free(targ_btf); + return 0; +} + +static int setup_type_id_case_failure(struct core_reloc_test_case *test) +{ + struct core_reloc_type_id_output *exp = (void *)test->output; + int err; + + err = setup_type_id_case_local(test); + if (err) + return err; + + exp->targ_struct = 0; + exp->targ_union = 0; + exp->targ_enum = 0; + exp->targ_int = 0; + exp->targ_struct_typedef = 0; + exp->targ_func_proto_typedef = 0; + exp->targ_arr_typedef = 0; + + return 0; +} + static struct core_reloc_test_case test_cases[] = { /* validate we can find kernel image and use its BTF for relocs */ { @@ -530,6 +682,10 @@ static struct core_reloc_test_case test_cases[] = { .struct_exists = 1, .struct_sz = sizeof(struct a_struct), }), + + /* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */ + TYPE_ID_CASE(type_id, setup_type_id_case_success), + TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure), }; struct data { @@ -551,7 +707,7 @@ void test_core_reloc(void) struct bpf_object_load_attr load_attr = {}; struct core_reloc_test_case *test_case; const char *tp_name, *probe_name; - int err, duration = 0, i, equal; + int err, i, equal; struct bpf_link *link = NULL; struct bpf_map *data_map; struct bpf_program *prog; @@ -567,11 +723,13 @@ void test_core_reloc(void) if (!test__start_subtest(test_case->case_name)) continue; - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .relaxed_core_relocs = test_case->relaxed_core_relocs, - ); + if (test_case->setup) { + err = test_case->setup(test_case); + if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err)) + continue; + } - obj = bpf_object__open_file(test_case->bpf_obj_file, &opts); + obj = bpf_object__open_file(test_case->bpf_obj_file, NULL); if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", test_case->bpf_obj_file, PTR_ERR(obj))) continue; diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c new file mode 100644 index 000000000000..abbe5bddcefd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_id x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c new file mode 100644 index 000000000000..24e7caf4f013 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_id___missing_targets x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index d998537867a2..10afcc5f219f 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -1034,3 +1034,44 @@ struct core_reloc_type_based___fn_wrong_args { func_proto_typedef___fn_wrong_arg_cnt1 f6; func_proto_typedef___fn_wrong_arg_cnt2 f7; }; + +/* + * TYPE ID MAPPING (LOCAL AND TARGET) + */ +struct core_reloc_type_id_output { + int local_anon_struct; + int local_anon_union; + int local_anon_enum; + int local_anon_func_proto_ptr; + int local_anon_void_ptr; + int local_anon_arr; + + int local_struct; + int local_union; + int local_enum; + int local_int; + int local_struct_typedef; + int local_func_proto_typedef; + int local_arr_typedef; + + int targ_struct; + int targ_union; + int targ_enum; + int targ_int; + int targ_struct_typedef; + int targ_func_proto_typedef; + int targ_arr_typedef; +}; + +struct core_reloc_type_id { + struct a_struct f1; + union a_union f2; + enum an_enum f3; + named_struct_typedef f4; + func_proto_typedef f5; + arr_typedef f6; +}; + +struct core_reloc_type_id___missing_targets { + /* nothing */ +}; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c index 6ab259d02dc0..fb60f8195c53 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -48,20 +48,6 @@ typedef int (*func_proto_typedef)(long); typedef char arr_typedef[20]; -struct core_reloc_type_based { - struct a_struct f1; - union a_union f2; - enum an_enum f3; - named_struct_typedef f4; - anon_struct_typedef f5; - struct_ptr_typedef f6; - int_typedef f7; - enum_typedef f8; - void_ptr_typedef f9; - func_proto_typedef f10; - arr_typedef f11; -}; - struct core_reloc_type_based_output { bool struct_exists; bool union_exists; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c new file mode 100644 index 000000000000..23e6e6bf276c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +/* some types are shared with test_core_reloc_type_based.c */ +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef struct a_struct named_struct_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_id_output { + int local_anon_struct; + int local_anon_union; + int local_anon_enum; + int local_anon_func_proto_ptr; + int local_anon_void_ptr; + int local_anon_arr; + + int local_struct; + int local_union; + int local_enum; + int local_int; + int local_struct_typedef; + int local_func_proto_typedef; + int local_arr_typedef; + + int targ_struct; + int targ_union; + int targ_enum; + int targ_int; + int targ_struct_typedef; + int targ_func_proto_typedef; + int targ_arr_typedef; +}; + +/* preserve types even if Clang doesn't support built-in */ +struct a_struct t1 = {}; +union a_union t2 = {}; +enum an_enum t3 = 0; +named_struct_typedef t4 = {}; +func_proto_typedef t5 = 0; +arr_typedef t6 = {}; + +SEC("raw_tracepoint/sys_enter") +int test_core_type_id(void *ctx) +{ + /* We use __builtin_btf_type_id() in this tests, but up until the time + * __builtin_preserve_type_info() was added it contained a bug that + * would make this test fail. The bug was fixed with addition of + * __builtin_preserve_type_info(), though, so that's what we are using + * to detect whether this test has to be executed, however strange + * that might look like. + */ +#if __has_builtin(__builtin_preserve_type_info) + struct core_reloc_type_id_output *out = (void *)&data.out; + + out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; }); + out->local_anon_union = bpf_core_type_id_local(union { int marker_field; }); + out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 }); + out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int)); + out->local_anon_void_ptr = bpf_core_type_id_local(void *); + out->local_anon_arr = bpf_core_type_id_local(_Bool[47]); + + out->local_struct = bpf_core_type_id_local(struct a_struct); + out->local_union = bpf_core_type_id_local(union a_union); + out->local_enum = bpf_core_type_id_local(enum an_enum); + out->local_int = bpf_core_type_id_local(int); + out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef); + out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef); + out->local_arr_typedef = bpf_core_type_id_local(arr_typedef); + + out->targ_struct = bpf_core_type_id_kernel(struct a_struct); + out->targ_union = bpf_core_type_id_kernel(union a_union); + out->targ_enum = bpf_core_type_id_kernel(enum an_enum); + out->targ_int = bpf_core_type_id_kernel(int); + out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef); + out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef); + out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef); +#else + data.skip = true; +#endif + + return 0; +} -- cgit v1.3.1 From eacaaed784e2c9da69dea3030c81062c1fd66a37 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 12:45:18 -0700 Subject: libbpf: Implement enum value-based CO-RE relocations Implement two relocations of a new enumerator value-based CO-RE relocation kind: ENUMVAL_EXISTS and ENUMVAL_VALUE. First, ENUMVAL_EXISTS, allows to detect the presence of a named enumerator value in the target (kernel) BTF. This is useful to do BPF helper/map/program type support detection from BPF program side. bpf_core_enum_value_exists() macro helper is provided to simplify built-in usage. Second, ENUMVAL_VALUE, allows to capture enumerator integer value and relocate it according to the target BTF, if it changes. This is useful to have a guarantee against intentional or accidental re-ordering/re-numbering of some of the internal (non-UAPI) enumerations, where kernel developers don't care about UAPI backwards compatiblity concerns. bpf_core_enum_value() allows to capture this succinctly and use correct enum values in code. LLVM uses ldimm64 instruction to capture enumerator value-based relocations, so add support for ldimm64 instruction patching as well. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200819194519.3375898-5-andriin@fb.com --- tools/lib/bpf/bpf_core_read.h | 28 ++++++++ tools/lib/bpf/libbpf.c | 145 ++++++++++++++++++++++++++++++++++++++-- tools/lib/bpf/libbpf_internal.h | 2 + 3 files changed, 170 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 684bfb86361a..bbcefb3ff5a5 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -31,6 +31,12 @@ enum bpf_type_info_kind { BPF_TYPE_SIZE = 1, /* type size in target kernel */ }; +/* second argument to __builtin_preserve_enum_value() built-in */ +enum bpf_enum_value_kind { + BPF_ENUMVAL_EXISTS = 0, /* enum value existence in kernel */ + BPF_ENUMVAL_VALUE = 1, /* enum value value relocation */ +}; + #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) @@ -150,6 +156,28 @@ enum bpf_type_info_kind { #define bpf_core_type_size(type) \ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE) +/* + * Convenience macro to check that provided enumerator value is defined in + * a target kernel. + * Returns: + * 1, if specified enum type and its enumerator value are present in target + * kernel's BTF; + * 0, if no matching enum and/or enum value within that enum is found. + */ +#define bpf_core_enum_value_exists(enum_type, enum_value) \ + __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS) + +/* + * Convenience macro to get the integer value of an enumerator value in + * a target kernel. + * Returns: + * 64-bit value, if specified enum type and its enumerator value are + * present in target kernel's BTF; + * 0, if no matching enum and/or enum value within that enum is found. + */ +#define bpf_core_enum_value(enum_type, enum_value) \ + __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE) + /* * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures * offset relocation for source address using __builtin_preserve_access_index() diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 882759dfc33e..77d420c02094 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4115,6 +4115,8 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) case BPF_TYPE_ID_TARGET: return "target_type_id"; case BPF_TYPE_EXISTS: return "type_exists"; case BPF_TYPE_SIZE: return "type_size"; + case BPF_ENUMVAL_EXISTS: return "enumval_exists"; + case BPF_ENUMVAL_VALUE: return "enumval_value"; default: return "unknown"; } } @@ -4147,6 +4149,17 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) } } +static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_ENUMVAL_EXISTS: + case BPF_ENUMVAL_VALUE: + return true; + default: + return false; + } +} + /* * Turn bpf_core_relo into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting @@ -4180,6 +4193,9 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their * spec and raw_spec are kept empty. + * + * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access + * string to specify enumerator's value index that need to be relocated. */ static int bpf_core_parse_spec(const struct btf *btf, __u32 type_id, @@ -4224,16 +4240,25 @@ static int bpf_core_parse_spec(const struct btf *btf, if (spec->raw_len == 0) return -EINVAL; - /* first spec value is always reloc type array index */ t = skip_mods_and_typedefs(btf, type_id, &id); if (!t) return -EINVAL; access_idx = spec->raw_spec[0]; - spec->spec[0].type_id = id; - spec->spec[0].idx = access_idx; + acc = &spec->spec[0]; + acc->type_id = id; + acc->idx = access_idx; spec->len++; + if (core_relo_is_enumval_based(relo_kind)) { + if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) + return -EINVAL; + + /* record enumerator name in a first accessor */ + acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); + return 0; + } + if (!core_relo_is_field_based(relo_kind)) return -EINVAL; @@ -4676,6 +4701,39 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, local_acc = &local_spec->spec[0]; targ_acc = &targ_spec->spec[0]; + if (core_relo_is_enumval_based(local_spec->relo_kind)) { + size_t local_essent_len, targ_essent_len; + const struct btf_enum *e; + const char *targ_name; + + /* has to resolve to an enum */ + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); + if (!btf_is_enum(targ_type)) + return 0; + + local_essent_len = bpf_core_essential_name_len(local_acc->name); + + for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { + targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); + targ_essent_len = bpf_core_essential_name_len(targ_name); + if (targ_essent_len != local_essent_len) + continue; + if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + return 1; + } + } + return 0; + } + + if (!core_relo_is_field_based(local_spec->relo_kind)) + return -EINVAL; + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); @@ -4880,6 +4938,31 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, return 0; } +static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, + __u32 *val) +{ + const struct btf_type *t; + const struct btf_enum *e; + + switch (relo->kind) { + case BPF_ENUMVAL_EXISTS: + *val = spec ? 1 : 0; + break; + case BPF_ENUMVAL_VALUE: + if (!spec) + return -EUCLEAN; /* request instruction poisoning */ + t = btf__type_by_id(spec->btf, spec->spec[0].type_id); + e = btf_enum(t) + spec->spec[0].idx; + *val = e->val; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + struct bpf_core_relo_res { /* expected value in the instruction, unless validate == false */ @@ -4918,6 +5001,9 @@ static int bpf_core_calc_relo(const struct bpf_program *prog, } else if (core_relo_is_type_based(relo->kind)) { err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); + } else if (core_relo_is_enumval_based(relo->kind)) { + err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); + err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); } if (err == -EUCLEAN) { @@ -4954,6 +5040,11 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ } +static bool is_ldimm64(struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + /* * Patch relocatable BPF instruction. * @@ -4966,6 +5057,7 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, * Currently three kinds of BPF instructions are supported: * 1. rX = (assignment with immediate operand); * 2. rX += (arithmetic operations with immediate operand); + * 3. rX = (load with 64-bit immediate value). */ static int bpf_core_patch_insn(struct bpf_program *prog, const struct bpf_core_relo *relo, @@ -4984,6 +5076,11 @@ static int bpf_core_patch_insn(struct bpf_program *prog, class = BPF_CLASS(insn->code); if (res->poison) { + /* poison second part of ldimm64 to avoid confusing error from + * verifier about "unknown opcode 00" + */ + if (is_ldimm64(insn)) + bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1); bpf_core_poison_insn(prog, relo_idx, insn_idx, insn); return 0; } @@ -5012,7 +5109,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog, case BPF_ST: case BPF_STX: if (res->validate && insn->off != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", bpf_program__title(prog, false), relo_idx, insn_idx, insn->off, orig_val, new_val); return -EINVAL; @@ -5029,8 +5126,36 @@ static int bpf_core_patch_insn(struct bpf_program *prog, bpf_program__title(prog, false), relo_idx, insn_idx, orig_val, new_val); break; + case BPF_LD: { + __u64 imm; + + if (!is_ldimm64(insn) || + insn[0].src_reg != 0 || insn[0].off != 0 || + insn_idx + 1 >= prog->insns_cnt || + insn[1].code != 0 || insn[1].dst_reg != 0 || + insn[1].src_reg != 0 || insn[1].off != 0) { + pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", + bpf_program__title(prog, false), relo_idx, insn_idx); + return -EINVAL; + } + + imm = insn[0].imm + ((__u64)insn[1].imm << 32); + if (res->validate && imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, imm, orig_val, new_val); + return -EINVAL; + } + + insn[0].imm = new_val; + insn[1].imm = 0; /* currently only 32-bit values are supported */ + pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + imm, new_val); + break; + } default: - pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", bpf_program__title(prog, false), relo_idx, insn_idx, insn->code, insn->src_reg, insn->dst_reg, insn->off, insn->imm); @@ -5047,6 +5172,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog, static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) { const struct btf_type *t; + const struct btf_enum *e; const char *s; __u32 type_id; int i; @@ -5060,6 +5186,15 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) if (core_relo_is_type_based(spec->relo_kind)) return; + if (core_relo_is_enumval_based(spec->relo_kind)) { + t = skip_mods_and_typedefs(spec->btf, type_id, NULL); + e = btf_enum(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + + libbpf_print(level, "::%s = %u", s, e->val); + return; + } + if (core_relo_is_field_based(spec->relo_kind)) { for (i = 0; i < spec->len; i++) { if (spec->spec[i].name) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index edd3511aa242..61dff515a2f0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -242,6 +242,8 @@ enum bpf_core_relo_kind { BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ BPF_TYPE_SIZE = 9, /* type size in bytes */ + BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ }; /* The minimum bpf_core_relo checked by the loader -- cgit v1.3.1 From 3357490555eacd39c2bf3e355a0e435f658d806f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 12:45:19 -0700 Subject: selftests/bpf: Add tests for ENUMVAL_EXISTS/ENUMVAL_VALUE relocations Add tests validating existence and value relocations for enum value-based relocations. If __builtin_preserve_enum_value() built-in is not supported, skip tests. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200819194519.3375898-6-andriin@fb.com --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 56 +++++++++++++++ .../selftests/bpf/progs/btf__core_reloc_enumval.c | 3 + .../bpf/progs/btf__core_reloc_enumval___diff.c | 3 + .../progs/btf__core_reloc_enumval___err_missing.c | 3 + .../progs/btf__core_reloc_enumval___val3_missing.c | 3 + .../testing/selftests/bpf/progs/core_reloc_types.h | 84 ++++++++++++++++++++++ .../selftests/bpf/progs/test_core_reloc_enumval.c | 72 +++++++++++++++++++ 7 files changed, 224 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index ad550510ef69..30e40ff4b0d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -289,6 +289,23 @@ static int duration = 0; .fails = true, \ } +#define ENUMVAL_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_enumval.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define ENUMVAL_CASE(name, ...) { \ + ENUMVAL_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_enumval_output), \ +} + +#define ENUMVAL_ERR_CASE(name) { \ + ENUMVAL_CASE_COMMON(name), \ + .fails = true, \ +} + struct core_reloc_test_case; typedef int (*setup_test_fn)(struct core_reloc_test_case *test); @@ -686,6 +703,45 @@ static struct core_reloc_test_case test_cases[] = { /* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */ TYPE_ID_CASE(type_id, setup_type_id_case_success), TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure), + + /* Enumerator value existence and value relocations */ + ENUMVAL_CASE(enumval, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = true, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = true, + .named_val1 = 1, + .named_val2 = 2, + .anon_val1 = 0x10, + .anon_val2 = 0x20, + }), + ENUMVAL_CASE(enumval___diff, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = true, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = true, + .named_val1 = 101, + .named_val2 = 202, + .anon_val1 = 0x11, + .anon_val2 = 0x22, + }), + ENUMVAL_CASE(enumval___val3_missing, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = false, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = false, + .named_val1 = 111, + .named_val2 = 222, + .anon_val1 = 0x111, + .anon_val2 = 0x222, + }), + ENUMVAL_ERR_CASE(enumval___err_missing), }; struct data { diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c new file mode 100644 index 000000000000..48e62f3f074f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c new file mode 100644 index 000000000000..53e5e5a76888 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c new file mode 100644 index 000000000000..d024fb2ac06e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___err_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c new file mode 100644 index 000000000000..9de6595d250c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___val3_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 10afcc5f219f..e6e616cb7bc9 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -1075,3 +1075,87 @@ struct core_reloc_type_id { struct core_reloc_type_id___missing_targets { /* nothing */ }; + +/* + * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION + */ +struct core_reloc_enumval_output { + bool named_val1_exists; + bool named_val2_exists; + bool named_val3_exists; + bool anon_val1_exists; + bool anon_val2_exists; + bool anon_val3_exists; + + int named_val1; + int named_val2; + int anon_val1; + int anon_val2; +}; + +enum named_enum { + NAMED_ENUM_VAL1 = 1, + NAMED_ENUM_VAL2 = 2, + NAMED_ENUM_VAL3 = 3, +}; + +typedef enum { + ANON_ENUM_VAL1 = 0x10, + ANON_ENUM_VAL2 = 0x20, + ANON_ENUM_VAL3 = 0x30, +} anon_enum; + +struct core_reloc_enumval { + enum named_enum f1; + anon_enum f2; +}; + +/* differing enumerator values */ +enum named_enum___diff { + NAMED_ENUM_VAL1___diff = 101, + NAMED_ENUM_VAL2___diff = 202, + NAMED_ENUM_VAL3___diff = 303, +}; + +typedef enum { + ANON_ENUM_VAL1___diff = 0x11, + ANON_ENUM_VAL2___diff = 0x22, + ANON_ENUM_VAL3___diff = 0x33, +} anon_enum___diff; + +struct core_reloc_enumval___diff { + enum named_enum___diff f1; + anon_enum___diff f2; +}; + +/* missing (optional) third enum value */ +enum named_enum___val3_missing { + NAMED_ENUM_VAL1___val3_missing = 111, + NAMED_ENUM_VAL2___val3_missing = 222, +}; + +typedef enum { + ANON_ENUM_VAL1___val3_missing = 0x111, + ANON_ENUM_VAL2___val3_missing = 0x222, +} anon_enum___val3_missing; + +struct core_reloc_enumval___val3_missing { + enum named_enum___val3_missing f1; + anon_enum___val3_missing f2; +}; + +/* missing (mandatory) second enum value, should fail */ +enum named_enum___err_missing { + NAMED_ENUM_VAL1___err_missing = 1, + NAMED_ENUM_VAL3___err_missing = 3, +}; + +typedef enum { + ANON_ENUM_VAL1___err_missing = 0x111, + ANON_ENUM_VAL3___err_missing = 0x222, +} anon_enum___err_missing; + +struct core_reloc_enumval___err_missing { + enum named_enum___err_missing f1; + anon_enum___err_missing f2; +}; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c new file mode 100644 index 000000000000..e7ef3dada2bf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +enum named_enum { + NAMED_ENUM_VAL1 = 1, + NAMED_ENUM_VAL2 = 2, + NAMED_ENUM_VAL3 = 3, +}; + +typedef enum { + ANON_ENUM_VAL1 = 0x10, + ANON_ENUM_VAL2 = 0x20, + ANON_ENUM_VAL3 = 0x30, +} anon_enum; + +struct core_reloc_enumval_output { + bool named_val1_exists; + bool named_val2_exists; + bool named_val3_exists; + bool anon_val1_exists; + bool anon_val2_exists; + bool anon_val3_exists; + + int named_val1; + int named_val2; + int anon_val1; + int anon_val2; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_enumval(void *ctx) +{ +#if __has_builtin(__builtin_preserve_enum_value) + struct core_reloc_enumval_output *out = (void *)&data.out; + enum named_enum named = 0; + anon_enum anon = 0; + + out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1); + out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2); + out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3); + + out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1); + out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2); + out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3); + + out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1); + out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2); + /* NAMED_ENUM_VAL3 value is optional */ + + out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1); + out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2); + /* ANON_ENUM_VAL3 value is optional */ +#else + data.skip = true; +#endif + + return 0; +} -- cgit v1.3.1 From eea11285dab3c961f8e9ce92318ccbc85fb1d5f1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 19 Aug 2020 12:46:54 +0200 Subject: tracing: switch to kernel_clone() The old _do_fork() helper is removed in favor of the new kernel_clone() helper. The latter adheres to naming conventions for kernel internal syscall helpers. Signed-off-by: Christian Brauner Cc: Mauro Carvalho Chehab Cc: Alexandre Chartre Cc: Jonathan Corbet Cc: Thomas Gleixner Cc: Masami Hiramatsu Cc: Shuah Khan Cc: Ingo Molnar Cc: Steven Rostedt Cc: Xiao Yang Cc: Tom Zanussi Cc: linux-doc@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Link: https://lore.kernel.org/r/20200819104655.436656-11-christian.brauner@ubuntu.com --- Documentation/trace/histogram.rst | 4 ++-- samples/kprobes/kretprobe_example.c | 4 ++-- .../selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc | 2 +- .../ftrace/test.d/dynevent/clear_select_events.tc | 2 +- .../ftrace/test.d/dynevent/generic_clear_event.tc | 2 +- .../ftrace/test.d/ftrace/func-filter-stacktrace.tc | 4 ++-- .../selftests/ftrace/test.d/kprobe/add_and_remove.tc | 2 +- tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc | 2 +- .../testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc | 4 ++-- .../selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_args_string.tc | 4 ++-- .../selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc | 10 +++++----- .../selftests/ftrace/test.d/kprobe/kprobe_args_type.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc | 14 +++++++------- .../selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 12 ++++++------ .../selftests/ftrace/test.d/kprobe/kretprobe_args.tc | 4 ++-- tools/testing/selftests/ftrace/test.d/kprobe/profile.tc | 2 +- 18 files changed, 39 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index 8408670d0328..f93333524a44 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -1495,7 +1495,7 @@ Extended error information # { stacktrace: - _do_fork+0x18e/0x330 + kernel_clone+0x18e/0x330 kernel_thread+0x29/0x30 kthreadd+0x154/0x1b0 ret_from_fork+0x3f/0x70 @@ -1588,7 +1588,7 @@ Extended error information SYSC_sendto+0xef/0x170 } hitcount: 88 { stacktrace: - _do_fork+0x18e/0x330 + kernel_clone+0x18e/0x330 SyS_clone+0x19/0x20 entry_SYSCALL_64_fastpath+0x12/0x6a } hitcount: 244 diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 78a2da6fb3cd..0c40f7236989 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -8,7 +8,7 @@ * * usage: insmod kretprobe_example.ko func= * - * If no func_name is specified, _do_fork is instrumented + * If no func_name is specified, kernel_clone is instrumented * * For more information on theory of operation of kretprobes, see * Documentation/staging/kprobes.rst @@ -26,7 +26,7 @@ #include #include -static char func_name[NAME_MAX] = "_do_fork"; +static char func_name[NAME_MAX] = "kernel_clone"; module_param_string(func, func_name, NAME_MAX, S_IRUGO); MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the" " function's execution time"); diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 68550f97d3c3..3bcd4c3624ee 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone echo "p:myevent1 $PLACE" >> dynamic_events echo "r:myevent2 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index c969be9eb7de..438961971b7e 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index 16d543eaac88..a8603bd23e0d 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 0f41e441c203..98305d76bd04 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -4,9 +4,9 @@ # requires: set_ftrace_filter # flags: instance -echo _do_fork:stacktrace >> set_ftrace_filter +echo kernel_clone:stacktrace >> set_ftrace_filter -grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter +grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter (echo "forked"; sleep 1) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index eba858c21815..9737cd0578a7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - adding and removing # requires: kprobe_events -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index d10bf4f05bc8..f9a40af76888 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - busy event check # requires: kprobe_events -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index 61f2ac441aec..eb543d3cfe5f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -3,13 +3,13 @@ # description: Kprobe dynamic event with arguments # requires: kprobe_events -echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events +echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable ( echo "forked") -grep testprobe trace | grep '_do_fork' | \ +grep testprobe trace | grep 'kernel_clone' | \ grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' echo 0 > events/kprobes/testprobe/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 05aaeed6987f..4e5b63be51c9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -5,7 +5,7 @@ grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old -echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events +echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events grep testprobe kprobe_events | grep -q 'comm=$comm' test -d events/kprobes/testprobe diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index b5fa05443b39..a1d70588ab21 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -30,13 +30,13 @@ esac : "Test get argument (1)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test _do_fork" >> kprobe_events +echo "p:test kernel_clone" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test _do_fork" >> kprobe_events +echo "p:test kernel_clone" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index b8c75a3d003c..bd25dd0ba0d0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then fi : "Test get basic types symbol argument" -echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events -echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events if grep -q "x8/16/32/64" README; then - echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events + echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events fi -echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable @@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace grep "testprobe_bf:.* arg1=.*" trace : "Test get string symbol argument" -echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events +echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 0610e0b5587c..91fcce1c241c 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize - echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" + echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" } check_types() { # s-type u-type x-type bf-type width diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 81d8b58c03bc..0d179094191f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,29 +5,29 @@ # prepare echo nop > current_tracer -echo _do_fork > set_ftrace_filter -echo 'p:testprobe _do_fork' > kprobe_events +echo kernel_clone > set_ftrace_filter +echo 'p:testprobe kernel_clone' > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep '_do_fork <-' trace +! grep 'kernel_clone <-' trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -35,11 +35,11 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep '_do_fork <-' trace +! grep 'kernel_clone <-' trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 366b7e1b6718..45d90b6c763d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "Create/append/":README # Choose 2 symbols for target -SYM1=_do_fork +SYM1=kernel_clone SYM2=do_exit EVENT_NAME=kprobes/testevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index b4d834675e59..c02ea50d63ea 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -86,15 +86,15 @@ esac # multiprobe errors if grep -q "Create/append/" README && grep -q "imm-value" README; then -echo 'p:kprobes/testevent _do_fork' > kprobe_events +echo 'p:kprobes/testevent kernel_clone' > kprobe_events check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE # Explicitly use printf "%s" to not interpret \1 -printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events -check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE +printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events +check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE +check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE fi exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 523fde6d1aa5..7ae492c204a4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,14 +4,14 @@ # requires: kprobe_events # Add new kretprobe event -echo 'r:testprobe2 _do_fork $retval' > kprobe_events +echo 'r:testprobe2 kernel_clone $retval' > kprobe_events grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable ( echo "forked") -cat trace | grep testprobe2 | grep -q '<- _do_fork' +cat trace | grep testprobe2 | grep -q '<- kernel_clone' echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index ff6c44adc8a0..c4093fc1a773 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -4,7 +4,7 @@ # requires: kprobe_events ! grep -q 'myevent' kprobe_profile -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile echo 1 > events/kprobes/myevent/enable ( echo "forked" ) -- cgit v1.3.1 From 6e9cab2e3fa597419ec8a117a9f3e3ebc251c120 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Wed, 19 Aug 2020 02:53:24 +0000 Subject: libbpf: Simplify the return expression of build_map_pin_path() Simplify the return expression. Signed-off-by: Xu Wang Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200819025324.14680-1-vulab@iscas.ac.cn --- tools/lib/bpf/libbpf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 77d420c02094..829d62a3ad5f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1943,7 +1943,7 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, static int build_map_pin_path(struct bpf_map *map, const char *path) { char buf[PATH_MAX]; - int err, len; + int len; if (!path) path = "/sys/fs/bpf"; @@ -1954,11 +1954,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_map__set_pin_path(map, buf); - if (err) - return err; - - return 0; + return bpf_map__set_pin_path(map, buf); } -- cgit v1.3.1 From d71fa5c9763c24dd997a2fa4feb7a13a95bab42c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 18 Aug 2020 21:27:58 -0700 Subject: bpf: Add kernel module with user mode driver that populates bpffs. Add kernel module with user mode driver that populates bpffs with BPF iterators. $ mount bpffs /my/bpffs/ -t bpf $ ls -la /my/bpffs/ total 4 drwxrwxrwt 2 root root 0 Jul 2 00:27 . drwxr-xr-x 19 root root 4096 Jul 2 00:09 .. -rw------- 1 root root 0 Jul 2 00:27 maps.debug -rw------- 1 root root 0 Jul 2 00:27 progs.debug The user mode driver will load BPF Type Formats, create BPF maps, populate BPF maps, load two BPF programs, attach them to BPF iterators, and finally send two bpf_link IDs back to the kernel. The kernel will pin two bpf_links into newly mounted bpffs instance under names "progs.debug" and "maps.debug". These two files become human readable. $ cat /my/bpffs/progs.debug id name attached 11 dump_bpf_map bpf_iter_bpf_map 12 dump_bpf_prog bpf_iter_bpf_prog 27 test_pkt_access 32 test_main test_pkt_access test_pkt_access 33 test_subprog1 test_pkt_access_subprog1 test_pkt_access 34 test_subprog2 test_pkt_access_subprog2 test_pkt_access 35 test_subprog3 test_pkt_access_subprog3 test_pkt_access 36 new_get_skb_len get_skb_len test_pkt_access 37 new_get_skb_ifindex get_skb_ifindex test_pkt_access 38 new_get_constant get_constant test_pkt_access The BPF program dump_bpf_prog() in iterators.bpf.c is printing this data about all BPF programs currently loaded in the system. This information is unstable and will change from kernel to kernel as ".debug" suffix conveys. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200819042759.51280-4-alexei.starovoitov@gmail.com --- init/Kconfig | 2 + kernel/Makefile | 2 +- kernel/bpf/Makefile | 1 + kernel/bpf/inode.c | 116 +++++++++++++++++++++- kernel/bpf/preload/Kconfig | 23 +++++ kernel/bpf/preload/Makefile | 23 +++++ kernel/bpf/preload/bpf_preload.h | 16 +++ kernel/bpf/preload/bpf_preload_kern.c | 91 +++++++++++++++++ kernel/bpf/preload/bpf_preload_umd_blob.S | 7 ++ kernel/bpf/preload/iterators/bpf_preload_common.h | 13 +++ kernel/bpf/preload/iterators/iterators.c | 94 ++++++++++++++++++ net/bpfilter/Kconfig | 1 + tools/lib/bpf/Makefile | 7 +- 13 files changed, 390 insertions(+), 6 deletions(-) create mode 100644 kernel/bpf/preload/Kconfig create mode 100644 kernel/bpf/preload/Makefile create mode 100644 kernel/bpf/preload/bpf_preload.h create mode 100644 kernel/bpf/preload/bpf_preload_kern.c create mode 100644 kernel/bpf/preload/bpf_preload_umd_blob.S create mode 100644 kernel/bpf/preload/iterators/bpf_preload_common.h create mode 100644 kernel/bpf/preload/iterators/iterators.c (limited to 'tools') diff --git a/init/Kconfig b/init/Kconfig index d6a0b31b13dc..fc10f7ede5f6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1710,6 +1710,8 @@ config BPF_JIT_DEFAULT_ON def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON depends on HAVE_EBPF_JIT && BPF_JIT +source "kernel/bpf/preload/Kconfig" + config USERFAULTFD bool "Enable userfaultfd() system call" depends on MMU diff --git a/kernel/Makefile b/kernel/Makefile index 9a20016d4900..22b0760660fc 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -12,7 +12,7 @@ obj-y = fork.o exec_domain.o panic.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o smpboot.o ucount.o regset.o -obj-$(CONFIG_BPFILTER) += usermode_driver.o +obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o obj-$(CONFIG_MODULES) += kmod.o obj-$(CONFIG_MULTIUSER) += groups.o diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index e6eb9c0402da..19e137aae40e 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -29,3 +29,4 @@ ifeq ($(CONFIG_BPF_JIT),y) obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o obj-${CONFIG_BPF_LSM} += bpf_lsm.o endif +obj-$(CONFIG_BPF_PRELOAD) += preload/ diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index fb878ba3f22f..b48a56f53495 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -20,6 +20,7 @@ #include #include #include +#include "preload/bpf_preload.h" enum bpf_type { BPF_TYPE_UNSPEC = 0, @@ -369,9 +370,10 @@ static struct dentry * bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future - * extensions. + * extensions. That allows popoulate_bpffs() create special files. */ - if (strchr(dentry->d_name.name, '.')) + if ((dir->i_mode & S_IALLUGO) && + strchr(dentry->d_name.name, '.')) return ERR_PTR(-EPERM); return simple_lookup(dir, dentry, flags); @@ -409,6 +411,27 @@ static const struct inode_operations bpf_dir_iops = { .unlink = simple_unlink, }; +/* pin iterator link into bpffs */ +static int bpf_iter_link_pin_kernel(struct dentry *parent, + const char *name, struct bpf_link *link) +{ + umode_t mode = S_IFREG | S_IRUSR; + struct dentry *dentry; + int ret; + + inode_lock(parent->d_inode); + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) { + inode_unlock(parent->d_inode); + return PTR_ERR(dentry); + } + ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, + &bpf_iter_fops); + dput(dentry); + inode_unlock(parent->d_inode); + return ret; +} + static int bpf_obj_do_pin(const char __user *pathname, void *raw, enum bpf_type type) { @@ -638,6 +661,91 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } +struct bpf_preload_ops *bpf_preload_ops; +EXPORT_SYMBOL_GPL(bpf_preload_ops); + +static bool bpf_preload_mod_get(void) +{ + /* If bpf_preload.ko wasn't loaded earlier then load it now. + * When bpf_preload is built into vmlinux the module's __init + * function will populate it. + */ + if (!bpf_preload_ops) { + request_module("bpf_preload"); + if (!bpf_preload_ops) + return false; + } + /* And grab the reference, so the module doesn't disappear while the + * kernel is interacting with the kernel module and its UMD. + */ + if (!try_module_get(bpf_preload_ops->owner)) { + pr_err("bpf_preload module get failed.\n"); + return false; + } + return true; +} + +static void bpf_preload_mod_put(void) +{ + if (bpf_preload_ops) + /* now user can "rmmod bpf_preload" if necessary */ + module_put(bpf_preload_ops->owner); +} + +static DEFINE_MUTEX(bpf_preload_lock); + +static int populate_bpffs(struct dentry *parent) +{ + struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; + struct bpf_link *links[BPF_PRELOAD_LINKS] = {}; + int err = 0, i; + + /* grab the mutex to make sure the kernel interactions with bpf_preload + * UMD are serialized + */ + mutex_lock(&bpf_preload_lock); + + /* if bpf_preload.ko wasn't built into vmlinux then load it */ + if (!bpf_preload_mod_get()) + goto out; + + if (!bpf_preload_ops->info.tgid) { + /* preload() will start UMD that will load BPF iterator programs */ + err = bpf_preload_ops->preload(objs); + if (err) + goto out_put; + for (i = 0; i < BPF_PRELOAD_LINKS; i++) { + links[i] = bpf_link_by_id(objs[i].link_id); + if (IS_ERR(links[i])) { + err = PTR_ERR(links[i]); + goto out_put; + } + } + for (i = 0; i < BPF_PRELOAD_LINKS; i++) { + err = bpf_iter_link_pin_kernel(parent, + objs[i].link_name, links[i]); + if (err) + goto out_put; + /* do not unlink successfully pinned links even + * if later link fails to pin + */ + links[i] = NULL; + } + /* finish() will tell UMD process to exit */ + err = bpf_preload_ops->finish(); + if (err) + goto out_put; + } +out_put: + bpf_preload_mod_put(); +out: + mutex_unlock(&bpf_preload_lock); + for (i = 0; i < BPF_PRELOAD_LINKS && err; i++) + if (!IS_ERR_OR_NULL(links[i])) + bpf_link_put(links[i]); + return err; +} + static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr bpf_rfiles[] = { { "" } }; @@ -654,8 +762,8 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) inode = sb->s_root->d_inode; inode->i_op = &bpf_dir_iops; inode->i_mode &= ~S_IALLUGO; + populate_bpffs(sb->s_root); inode->i_mode |= S_ISVTX | opts->mode; - return 0; } @@ -705,6 +813,8 @@ static int __init bpf_init(void) { int ret; + mutex_init(&bpf_preload_lock); + ret = sysfs_create_mount_point(fs_kobj, "bpf"); if (ret) return ret; diff --git a/kernel/bpf/preload/Kconfig b/kernel/bpf/preload/Kconfig new file mode 100644 index 000000000000..7144e2d01ee4 --- /dev/null +++ b/kernel/bpf/preload/Kconfig @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0-only +config USERMODE_DRIVER + bool + default n + +menuconfig BPF_PRELOAD + bool "Preload BPF file system with kernel specific program and map iterators" + depends on BPF + select USERMODE_DRIVER + help + This builds kernel module with several embedded BPF programs that are + pinned into BPF FS mount point as human readable files that are + useful in debugging and introspection of BPF programs and maps. + +if BPF_PRELOAD +config BPF_PRELOAD_UMD + tristate "bpf_preload kernel module with user mode driver" + depends on CC_CAN_LINK + depends on m || CC_CAN_LINK_STATIC + default m + help + This builds bpf_preload kernel module with embedded user mode driver. +endif diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile new file mode 100644 index 000000000000..12c7b62b9b6e --- /dev/null +++ b/kernel/bpf/preload/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 + +LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ +LIBBPF_A = $(obj)/libbpf.a +LIBBPF_OUT = $(abspath $(obj)) + +$(LIBBPF_A): + $(Q)$(MAKE) -C $(LIBBPF_SRCS) OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a + +userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ + -I $(srctree)/tools/lib/ -Wno-unused-result + +userprogs := bpf_preload_umd + +bpf_preload_umd-objs := iterators/iterators.o +bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz + +$(obj)/bpf_preload_umd: $(LIBBPF_A) + +$(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd + +obj-$(CONFIG_BPF_PRELOAD_UMD) += bpf_preload.o +bpf_preload-objs += bpf_preload_kern.o bpf_preload_umd_blob.o diff --git a/kernel/bpf/preload/bpf_preload.h b/kernel/bpf/preload/bpf_preload.h new file mode 100644 index 000000000000..2f9932276f2e --- /dev/null +++ b/kernel/bpf/preload/bpf_preload.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BPF_PRELOAD_H +#define _BPF_PRELOAD_H + +#include +#include "iterators/bpf_preload_common.h" + +struct bpf_preload_ops { + struct umd_info info; + int (*preload)(struct bpf_preload_info *); + int (*finish)(void); + struct module *owner; +}; +extern struct bpf_preload_ops *bpf_preload_ops; +#define BPF_PRELOAD_LINKS 2 +#endif diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c new file mode 100644 index 000000000000..79c5772465f1 --- /dev/null +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include "bpf_preload.h" + +extern char bpf_preload_umd_start; +extern char bpf_preload_umd_end; + +static int preload(struct bpf_preload_info *obj); +static int finish(void); + +static struct bpf_preload_ops umd_ops = { + .info.driver_name = "bpf_preload", + .preload = preload, + .finish = finish, + .owner = THIS_MODULE, +}; + +static int preload(struct bpf_preload_info *obj) +{ + int magic = BPF_PRELOAD_START; + loff_t pos = 0; + int i, err; + ssize_t n; + + err = fork_usermode_driver(&umd_ops.info); + if (err) + return err; + + /* send the start magic to let UMD proceed with loading BPF progs */ + n = kernel_write(umd_ops.info.pipe_to_umh, + &magic, sizeof(magic), &pos); + if (n != sizeof(magic)) + return -EPIPE; + + /* receive bpf_link IDs and names from UMD */ + pos = 0; + for (i = 0; i < BPF_PRELOAD_LINKS; i++) { + n = kernel_read(umd_ops.info.pipe_from_umh, + &obj[i], sizeof(*obj), &pos); + if (n != sizeof(*obj)) + return -EPIPE; + } + return 0; +} + +static int finish(void) +{ + int magic = BPF_PRELOAD_END; + struct pid *tgid; + loff_t pos = 0; + ssize_t n; + + /* send the last magic to UMD. It will do a normal exit. */ + n = kernel_write(umd_ops.info.pipe_to_umh, + &magic, sizeof(magic), &pos); + if (n != sizeof(magic)) + return -EPIPE; + tgid = umd_ops.info.tgid; + wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); + umd_ops.info.tgid = NULL; + return 0; +} + +static int __init load_umd(void) +{ + int err; + + err = umd_load_blob(&umd_ops.info, &bpf_preload_umd_start, + &bpf_preload_umd_end - &bpf_preload_umd_start); + if (err) + return err; + bpf_preload_ops = &umd_ops; + return err; +} + +static void __exit fini_umd(void) +{ + bpf_preload_ops = NULL; + /* kill UMD in case it's still there due to earlier error */ + kill_pid(umd_ops.info.tgid, SIGKILL, 1); + umd_ops.info.tgid = NULL; + umd_unload_blob(&umd_ops.info); +} +late_initcall(load_umd); +module_exit(fini_umd); +MODULE_LICENSE("GPL"); diff --git a/kernel/bpf/preload/bpf_preload_umd_blob.S b/kernel/bpf/preload/bpf_preload_umd_blob.S new file mode 100644 index 000000000000..f1f40223b5c3 --- /dev/null +++ b/kernel/bpf/preload/bpf_preload_umd_blob.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .section .init.rodata, "a" + .global bpf_preload_umd_start +bpf_preload_umd_start: + .incbin "kernel/bpf/preload/bpf_preload_umd" + .global bpf_preload_umd_end +bpf_preload_umd_end: diff --git a/kernel/bpf/preload/iterators/bpf_preload_common.h b/kernel/bpf/preload/iterators/bpf_preload_common.h new file mode 100644 index 000000000000..8464d1a48c05 --- /dev/null +++ b/kernel/bpf/preload/iterators/bpf_preload_common.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BPF_PRELOAD_COMMON_H +#define _BPF_PRELOAD_COMMON_H + +#define BPF_PRELOAD_START 0x5555 +#define BPF_PRELOAD_END 0xAAAA + +struct bpf_preload_info { + char link_name[16]; + int link_id; +}; + +#endif diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c new file mode 100644 index 000000000000..b7ff87939172 --- /dev/null +++ b/kernel/bpf/preload/iterators/iterators.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iterators.skel.h" +#include "bpf_preload_common.h" + +int to_kernel = -1; +int from_kernel = 0; + +static int send_link_to_kernel(struct bpf_link *link, const char *link_name) +{ + struct bpf_preload_info obj = {}; + struct bpf_link_info info = {}; + __u32 info_len = sizeof(info); + int err; + + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len); + if (err) + return err; + obj.link_id = info.id; + if (strlen(link_name) >= sizeof(obj.link_name)) + return -E2BIG; + strcpy(obj.link_name, link_name); + if (write(to_kernel, &obj, sizeof(obj)) != sizeof(obj)) + return -EPIPE; + return 0; +} + +int main(int argc, char **argv) +{ + struct rlimit rlim = { RLIM_INFINITY, RLIM_INFINITY }; + struct iterators_bpf *skel; + int err, magic; + int debug_fd; + + debug_fd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC); + if (debug_fd < 0) + return 1; + to_kernel = dup(1); + close(1); + dup(debug_fd); + /* now stdin and stderr point to /dev/console */ + + read(from_kernel, &magic, sizeof(magic)); + if (magic != BPF_PRELOAD_START) { + printf("bad start magic %d\n", magic); + return 1; + } + setrlimit(RLIMIT_MEMLOCK, &rlim); + /* libbpf opens BPF object and loads it into the kernel */ + skel = iterators_bpf__open_and_load(); + if (!skel) { + /* iterators.skel.h is little endian. + * libbpf doesn't support automatic little->big conversion + * of BPF bytecode yet. + * The program load will fail in such case. + */ + printf("Failed load could be due to wrong endianness\n"); + return 1; + } + err = iterators_bpf__attach(skel); + if (err) + goto cleanup; + + /* send two bpf_link IDs with names to the kernel */ + err = send_link_to_kernel(skel->links.dump_bpf_map, "maps.debug"); + if (err) + goto cleanup; + err = send_link_to_kernel(skel->links.dump_bpf_prog, "progs.debug"); + if (err) + goto cleanup; + + /* The kernel will proceed with pinnging the links in bpffs. + * UMD will wait on read from pipe. + */ + read(from_kernel, &magic, sizeof(magic)); + if (magic != BPF_PRELOAD_END) { + printf("bad final magic %d\n", magic); + err = -EINVAL; + } +cleanup: + iterators_bpf__destroy(skel); + + return err != 0; +} diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig index 73d0b12789f1..8ad0233ce497 100644 --- a/net/bpfilter/Kconfig +++ b/net/bpfilter/Kconfig @@ -2,6 +2,7 @@ menuconfig BPFILTER bool "BPF based packet filtering framework (BPFILTER)" depends on NET && BPF && INET + select USERMODE_DRIVER help This builds experimental bpfilter framework that is aiming to provide netfilter compatible functionality via BPF diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index c5dbfafdf889..66b2cfadf262 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) # Most of this file is copied from tools/lib/traceevent/Makefile +RM ?= rm +srctree = $(abs_srctree) + LIBBPF_VERSION := $(shell \ grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \ sort -rV | head -n1 | cut -d'_' -f2) @@ -188,7 +191,7 @@ $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) $(OUTPUT)libbpf.a: $(BPF_IN_STATIC) - $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ + $(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^ $(OUTPUT)libbpf.pc: $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ @@ -291,7 +294,7 @@ cscope: cscope -b -q -I $(srctree)/include -f cscope.out tags: - rm -f TAGS tags + $(RM) -f TAGS tags ls *.c *.h | xargs $(TAGS_PROG) -a # Declare the contents of the .PHONY variable as phony. We keep that -- cgit v1.3.1 From edb65ee5aa2550f6035c8fb169a359bc18acb84a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 18 Aug 2020 21:27:59 -0700 Subject: selftests/bpf: Add bpffs preload test. Add a test that mounts two bpffs instances and checks progs.debug and maps.debug for sanity data. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200819042759.51280-5-alexei.starovoitov@gmail.com --- .../testing/selftests/bpf/prog_tests/test_bpffs.c | 94 ++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bpffs.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c new file mode 100644 index 000000000000..172c999e523c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#define TDIR "/sys/kernel/debug" + +static int read_iter(char *file) +{ + /* 1024 should be enough to get contiguous 4 "iter" letters at some point */ + char buf[1024]; + int fd, len; + + fd = open(file, 0); + if (fd < 0) + return -1; + while ((len = read(fd, buf, sizeof(buf))) > 0) + if (strstr(buf, "iter")) { + close(fd); + return 0; + } + close(fd); + return -1; +} + +static int fn(void) +{ + int err, duration = 0; + + err = unshare(CLONE_NEWNS); + if (CHECK(err, "unshare", "failed: %d\n", errno)) + goto out; + + err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL); + if (CHECK(err, "mount /", "failed: %d\n", errno)) + goto out; + + err = umount(TDIR); + if (CHECK(err, "umount " TDIR, "failed: %d\n", errno)) + goto out; + + err = mount("none", TDIR, "tmpfs", 0, NULL); + if (CHECK(err, "mount", "mount root failed: %d\n", errno)) + goto out; + + err = mkdir(TDIR "/fs1", 0777); + if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno)) + goto out; + err = mkdir(TDIR "/fs2", 0777); + if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno)) + goto out; + + err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL); + if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno)) + goto out; + err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL); + if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno)) + goto out; + + err = read_iter(TDIR "/fs1/maps.debug"); + if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n")) + goto out; + err = read_iter(TDIR "/fs2/progs.debug"); + if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n")) + goto out; +out: + umount(TDIR "/fs1"); + umount(TDIR "/fs2"); + rmdir(TDIR "/fs1"); + rmdir(TDIR "/fs2"); + umount(TDIR); + exit(err); +} + +void test_test_bpffs(void) +{ + int err, duration = 0, status = 0; + pid_t pid; + + pid = fork(); + if (CHECK(pid == -1, "clone", "clone failed %d", errno)) + return; + if (pid == 0) + fn(); + err = waitpid(pid, &status, 0); + if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno)) + return; + if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status))) + return; +} -- cgit v1.3.1 From 9b2f6fecf3b77d2457a13f77a563c07304f15775 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 23:14:08 -0700 Subject: libbpf: Fix detection of BPF helper call instruction BPF_CALL | BPF_JMP32 is explicitly not allowed by verifier for BPF helper calls, so don't detect it as a valid call. Also drop the check on func_id pointer, as it's currently always non-null. Fixes: 109cea5a594f ("libbpf: Sanitize BPF program code for bpf_probe_read_{kernel, user}[_str]") Reported-by: Yonghong Song Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200820061411.1755905-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 829d62a3ad5f..0bc1fd813408 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5840,14 +5840,12 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) { - __u8 class = BPF_CLASS(insn->code); - - if ((class == BPF_JMP || class == BPF_JMP32) && + if (BPF_CLASS(insn->code) == BPF_JMP && BPF_OP(insn->code) == BPF_CALL && BPF_SRC(insn->code) == BPF_K && - insn->src_reg == 0 && insn->dst_reg == 0) { - if (func_id) - *func_id = insn->imm; + insn->src_reg == 0 && + insn->dst_reg == 0) { + *func_id = insn->imm; return true; } return false; -- cgit v1.3.1 From dda1ec9fc7f8383cb469a82614dbce61f357f3f8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 23:14:09 -0700 Subject: libbpf: Fix libbpf build on compilers missing __builtin_mul_overflow GCC compilers older than version 5 don't support __builtin_mul_overflow yet. Given GCC 4.9 is the minimal supported compiler for building kernel and the fact that libbpf is a dependency of resolve_btfids, which is dependency of CONFIG_DEBUG_INFO_BTF=y, this needs to be handled. This patch fixes the issue by falling back to slower detection of integer overflow in such cases. Fixes: 029258d7b228 ("libbpf: Remove any use of reallocarray() in libbpf") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200820061411.1755905-2-andriin@fb.com --- tools/lib/bpf/libbpf_internal.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 61dff515a2f0..4d1c366fca2c 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -10,6 +10,7 @@ #define __LIBBPF_LIBBPF_INTERNAL_H #include +#include /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -77,6 +78,9 @@ do { \ #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif /* * Re-implement glibc's reallocarray() for libbpf internal-only use. * reallocarray(), unfortunately, is not available in all versions of glibc, @@ -90,8 +94,14 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) { size_t total; +#if __has_builtin(__builtin_mul_overflow) if (unlikely(__builtin_mul_overflow(nmemb, size, &total))) return NULL; +#else + if (size == 0 || nmemb > ULONG_MAX / size) + return NULL; + total = nmemb * size; +#endif return realloc(ptr, total); } -- cgit v1.3.1 From 37a6a9e7676381cdf6868d2c013893074a1ee7b6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 23:14:10 -0700 Subject: selftests/bpf: Fix two minor compilation warnings reported by GCC 4.9 GCC 4.9 seems to be more strict in some regards. Fix two minor issue it reported. Fixes: 1c1052e0140a ("tools/testing/selftests/bpf: Add self-tests for new helper bpf_get_ns_current_pid_tgid.") Fixes: 2d7824ffd25c ("selftests: bpf: Add test for sk_assign") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200820061411.1755905-3-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/sk_assign.c | 3 ++- tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 47fa04adc147..d43038d2b9e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -268,6 +268,7 @@ void test_sk_assign(void) int server = -1; int server_map; int self_net; + int i; self_net = open(NS_SELF, O_RDONLY); if (CHECK_FAIL(self_net < 0)) { @@ -286,7 +287,7 @@ void test_sk_assign(void) goto cleanup; } - for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { + for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { struct test_sk_cfg *test = &tests[i]; const struct sockaddr *addr; const int zero = 0; diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c index ed253f252cd0..ec53b1ef90d2 100644 --- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c +++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c @@ -156,4 +156,5 @@ cleanup: bpf_object__close(obj); } } + return 0; } -- cgit v1.3.1 From 149cb3395504736485df1b93133e710dcf215c56 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 23:14:11 -0700 Subject: selftests/bpf: List newest Clang built-ins needed for some CO-RE selftests Record which built-ins are optional and needed for some of recent BPF CO-RE subtests. Document Clang diff that fixed corner-case issue with __builtin_btf_type_id(). Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200820061411.1755905-4-andriin@fb.com --- tools/testing/selftests/bpf/README.rst | 21 +++++++++++++++++++++ .../selftests/bpf/progs/test_core_reloc_type_id.c | 4 +++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index e885d351595f..66acfcf15ff2 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -43,3 +43,24 @@ This is due to a llvm BPF backend bug. The fix https://reviews.llvm.org/D78466 has been pushed to llvm 10.x release branch and will be available in 10.0.1. The fix is available in llvm 11.0.0 trunk. + +BPF CO-RE-based tests and Clang version +======================================= + +A set of selftests use BPF target-specific built-ins, which might require +bleeding-edge Clang versions (Clang 12 nightly at this time). + +Few sub-tests of core_reloc test suit (part of test_progs test runner) require +the following built-ins, listed with corresponding Clang diffs introducing +them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too +old to support them, they shouldn't cause build failures or runtime test +failures: + + - __builtin_btf_type_id() ([0], [1], [2]); + - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]). + + [0] https://reviews.llvm.org/D74572 + [1] https://reviews.llvm.org/D74668 + [2] https://reviews.llvm.org/D85174 + [3] https://reviews.llvm.org/D83878 + [4] https://reviews.llvm.org/D83242 diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c index 23e6e6bf276c..22aba3f6e344 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c @@ -75,10 +75,12 @@ int test_core_type_id(void *ctx) { /* We use __builtin_btf_type_id() in this tests, but up until the time * __builtin_preserve_type_info() was added it contained a bug that - * would make this test fail. The bug was fixed with addition of + * would make this test fail. The bug was fixed ([0]) with addition of * __builtin_preserve_type_info(), though, so that's what we are using * to detect whether this test has to be executed, however strange * that might look like. + * + * [0] https://reviews.llvm.org/D85174 */ #if __has_builtin(__builtin_preserve_type_info) struct core_reloc_type_id_output *out = (void *)&data.out; -- cgit v1.3.1 From d1876f3596a57b114ae8615e28a935d996ac5464 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Aug 2020 11:21:15 +0100 Subject: cpupowerutils: fix spelling mistake "dependant" -> "dependent" There is a spelling mistake in a message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Shuah Khan --- tools/power/cpupower/debug/i386/intel_gsic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/cpupower/debug/i386/intel_gsic.c b/tools/power/cpupower/debug/i386/intel_gsic.c index e5e926f46d6b..befd837f07f8 100644 --- a/tools/power/cpupower/debug/i386/intel_gsic.c +++ b/tools/power/cpupower/debug/i386/intel_gsic.c @@ -71,7 +71,7 @@ int main (void) printf("\tsmi_cmd=0x?? smi_port=0x?? smi_sig=1\n"); printf("\nUnfortunately, you have to know what exactly are " "smi_cmd and smi_port, and this\nis system " - "dependant.\n"); + "dependent.\n"); } return 1; } -- cgit v1.3.1 From 527b7779e5ecabb057089b760140309bdcacc16a Mon Sep 17 00:00:00 2001 From: Martin Kaistra Date: Wed, 12 Aug 2020 11:49:12 +0200 Subject: cpupower: speed up generating git version string The variable VERSION is expanded for every use of CFLAGS. This causes "git describe" to get called multiple times on the kernel tree, which can be quite slow. The git revision does not change during build, so we can use simple variable expansion to set VERSION. Signed-off-by: Martin Kaistra Acked-by: Thomas Renninger Signed-off-by: Shuah Khan --- tools/power/cpupower/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index c8622497ef23..c7bcddbd486d 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -51,7 +51,7 @@ DESTDIR ?= # Package-related definitions. Distributions can modify the version # and _should_ modify the PACKAGE_BUGREPORT definition -VERSION= $(shell ./utils/version-gen.sh) +VERSION:= $(shell ./utils/version-gen.sh) LIB_MAJ= 0.0.1 LIB_MIN= 0 -- cgit v1.3.1 From 0d89419319ef68f8acc1b78377ad3e7523fead4a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Aug 2020 10:11:16 -0700 Subject: selftests: net: tcp_mmap: use madvise(MADV_DONTNEED) When TCP_ZEROCOPY_RECEIVE operation has been added, I made the mistake of automatically un-mapping prior content before mapping new pages. This has the unfortunate effect of adding potentially long MMU operations (like TLB flushes) while socket lock is held. Using madvise(MADV_DONTNEED) right after pages has been used has two benefits : 1) This releases pages sooner, allowing pages to be recycled if they were part of a page pool in a NIC driver. 2) No more long unmap operations while preventing immediate processing of incoming packets. The cost of the added system call is small enough. Arjun will submit a kernel patch allowing to opt out from the unmap attempt in tcp_zerocopy_receive() Signed-off-by: Eric Dumazet Cc: Arjun Roy Cc: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_mmap.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index a61b7b3da549..59ec0b59f7b7 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -179,6 +179,10 @@ void *child_thread(void *arg) total_mmap += zc.length; if (xflg) hash_zone(addr, zc.length); + /* It is more efficient to unmap the pages right now, + * instead of doing this in next TCP_ZEROCOPY_RECEIVE. + */ + madvise(addr, zc.length, MADV_DONTNEED); total += zc.length; } if (zc.recv_skip_hint) { -- cgit v1.3.1 From 72653ae5303c626ca29fcbcbb8165a894a104adf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Aug 2020 10:11:17 -0700 Subject: selftests: net: tcp_mmap: Use huge pages in send path There are significant gains using huge pages when available, as shown in [1]. This patch adds mmap_large_buffer() and uses it in client side (tx path of this reference tool) Following patch will use the feature for server side. [1] https://patchwork.ozlabs.org/project/netdev/patch/20200820154359.1806305-1-edumazet@google.com/ Signed-off-by: Eric Dumazet Cc: Arjun Roy Cc: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_mmap.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 59ec0b59f7b7..ca2618f3e7a1 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -123,6 +123,28 @@ void hash_zone(void *zone, unsigned int length) #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) + +static void *mmap_large_buffer(size_t need, size_t *allocated) +{ + void *buffer; + size_t sz; + + /* Attempt to use huge pages if possible. */ + sz = ALIGN_UP(need, map_align); + buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + + if (buffer == (void *)-1) { + sz = need; + buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buffer != (void *)-1) + fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n"); + } + *allocated = sz; + return buffer; +} + void *child_thread(void *arg) { unsigned long total_mmap = 0, total = 0; @@ -351,6 +373,7 @@ int main(int argc, char *argv[]) uint64_t total = 0; char *host = NULL; int fd, c, on = 1; + size_t buffer_sz; char *buffer; int sflg = 0; int mss = 0; @@ -441,8 +464,8 @@ int main(int argc, char *argv[]) } do_accept(fdlisten); } - buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + buffer = mmap_large_buffer(chunk_size, &buffer_sz); if (buffer == (char *)-1) { perror("mmap"); exit(1); @@ -488,6 +511,6 @@ int main(int argc, char *argv[]) total += wr; } close(fd); - munmap(buffer, chunk_size); + munmap(buffer, buffer_sz); return 0; } -- cgit v1.3.1 From 59c0d31988fb366189502a8ac66b7fe1486b7e40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Aug 2020 10:11:18 -0700 Subject: selftests: net: tcp_mmap: Use huge pages in receive path One down side of using TCP rx zerocopy is one extra TLB miss per page after the mapping operation. While if the application is using hugepages, the non zerocopy recvmsg() will not have to pay these TLB costs. This patch allows server side to use huge pages for the non zero copy case, to allow fair comparisons when both solutions use optimal conditions. Signed-off-by: Eric Dumazet Cc: Arjun Roy Cc: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_mmap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index ca2618f3e7a1..00f837c9bc6c 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -157,6 +157,7 @@ void *child_thread(void *arg) void *addr = NULL; double throughput; struct rusage ru; + size_t buffer_sz; int lu, fd; fd = (int)(unsigned long)arg; @@ -164,9 +165,9 @@ void *child_thread(void *arg) gettimeofday(&t0, NULL); fcntl(fd, F_SETFL, O_NDELAY); - buffer = malloc(chunk_size); - if (!buffer) { - perror("malloc"); + buffer = mmap_large_buffer(chunk_size, &buffer_sz); + if (buffer == (void *)-1) { + perror("mmap"); goto error; } if (zflg) { @@ -256,7 +257,7 @@ end: ru.ru_nvcsw); } error: - free(buffer); + munmap(buffer, buffer_sz); close(fd); if (zflg) munmap(raddr, chunk_size + map_align); -- cgit v1.3.1 From 6b0a249a301e2af9adda84adbced3a2988248b95 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 21 Aug 2020 11:44:18 -0700 Subject: bpf: Implement link_query for bpf iterators This patch implemented bpf_link callback functions show_fdinfo and fill_link_info to support link_query interface. The general interface for show_fdinfo and fill_link_info will print/fill the target_name. Each targets can register show_fdinfo and fill_link_info callbacks to print/fill more target specific information. For example, the below is a fdinfo result for a bpf task iterator. $ cat /proc/1749/fdinfo/7 pos: 0 flags: 02000000 mnt_id: 14 link_type: iter link_id: 11 prog_tag: 990e1f8152f7e54f prog_id: 59 target_name: task Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821184418.574122-1-yhs@fb.com --- include/linux/bpf.h | 6 +++++ include/uapi/linux/bpf.h | 7 +++++ kernel/bpf/bpf_iter.c | 58 ++++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 +++++ 4 files changed, 78 insertions(+) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9b7185a6b37..529e9b183eeb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1218,12 +1218,18 @@ typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, + struct seq_file *seq); +typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, + struct bpf_link_info *info); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; bpf_iter_attach_target_t attach_target; bpf_iter_detach_target_t detach_target; + bpf_iter_show_fdinfo_t show_fdinfo; + bpf_iter_fill_link_info_t fill_link_info; u32 ctx_arg_info_size; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0480f893facd..a1bbaff7a0af 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4071,6 +4071,13 @@ struct bpf_link_info { __u64 cgroup_id; __u32 attach_type; } cgroup; + struct { + __aligned_u64 target_name; /* in/out: target_name buffer ptr */ + __u32 target_name_len; /* in/out: target_name buffer len */ + union { + __u32 map_id; + } map; + } iter; struct { __u32 netns_ino; __u32 attach_type; diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b6715964b685..aeec7e174188 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -377,10 +377,68 @@ out_unlock: return ret; } +static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_iter_link *iter_link = + container_of(link, struct bpf_iter_link, link); + bpf_iter_show_fdinfo_t show_fdinfo; + + seq_printf(seq, + "target_name:\t%s\n", + iter_link->tinfo->reg_info->target); + + show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; + if (show_fdinfo) + show_fdinfo(&iter_link->aux, seq); +} + +static int bpf_iter_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_iter_link *iter_link = + container_of(link, struct bpf_iter_link, link); + char __user *ubuf = u64_to_user_ptr(info->iter.target_name); + bpf_iter_fill_link_info_t fill_link_info; + u32 ulen = info->iter.target_name_len; + const char *target_name; + u32 target_len; + + if (!ulen ^ !ubuf) + return -EINVAL; + + target_name = iter_link->tinfo->reg_info->target; + target_len = strlen(target_name); + info->iter.target_name_len = target_len + 1; + + if (ubuf) { + if (ulen >= target_len + 1) { + if (copy_to_user(ubuf, target_name, target_len + 1)) + return -EFAULT; + } else { + char zero = '\0'; + + if (copy_to_user(ubuf, target_name, ulen - 1)) + return -EFAULT; + if (put_user(zero, ubuf + ulen - 1)) + return -EFAULT; + return -ENOSPC; + } + } + + fill_link_info = iter_link->tinfo->reg_info->fill_link_info; + if (fill_link_info) + return fill_link_info(&iter_link->aux, info); + + return 0; +} + static const struct bpf_link_ops bpf_iter_link_lops = { .release = bpf_iter_link_release, .dealloc = bpf_iter_link_dealloc, .update_prog = bpf_iter_link_replace, + .show_fdinfo = bpf_iter_link_show_fdinfo, + .fill_link_info = bpf_iter_link_fill_link_info, }; bool bpf_link_is_iter(struct bpf_link *link) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0480f893facd..a1bbaff7a0af 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4071,6 +4071,13 @@ struct bpf_link_info { __u64 cgroup_id; __u32 attach_type; } cgroup; + struct { + __aligned_u64 target_name; /* in/out: target_name buffer ptr */ + __u32 target_name_len; /* in/out: target_name buffer len */ + union { + __u32 map_id; + } map; + } iter; struct { __u32 netns_ino; __u32 attach_type; -- cgit v1.3.1 From e60495eafdba24a466a6ae62da86245a958954bc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 21 Aug 2020 11:44:20 -0700 Subject: bpftool: Implement link_query for bpf iterators The link query for bpf iterators is implemented. Besides being shown to the user what bpf iterator the link represents, the target_name is also used to filter out what additional information should be printed out, e.g., whether map_id should be shown or not. The following is an example of bpf_iter link dump, plain output or pretty output. $ bpftool link show 11: iter prog 59 target_name task pids test_progs(1749) 34: iter prog 173 target_name bpf_map_elem map_id 127 pids test_progs_1(1753) $ bpftool -p link show [{ "id": 11, "type": "iter", "prog_id": 59, "target_name": "task", "pids": [{ "pid": 1749, "comm": "test_progs" } ] },{ "id": 34, "type": "iter", "prog_id": 173, "target_name": "bpf_map_elem", "map_id": 127, "pids": [{ "pid": 1753, "comm": "test_progs_1" } ] } ] Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200821184420.574430-1-yhs@fb.com --- tools/bpf/bpftool/link.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index a89f09e3c848..e77e1525d20a 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -77,6 +77,22 @@ static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr) jsonw_uint_field(wtr, "attach_type", attach_type); } +static bool is_iter_map_target(const char *target_name) +{ + return strcmp(target_name, "bpf_map_elem") == 0 || + strcmp(target_name, "bpf_sk_storage_map") == 0; +} + +static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr) +{ + const char *target_name = u64_to_ptr(info->iter.target_name); + + jsonw_string_field(wtr, "target_name", target_name); + + if (is_iter_map_target(target_name)) + jsonw_uint_field(wtr, "map_id", info->iter.map.map_id); +} + static int get_prog_info(int prog_id, struct bpf_prog_info *info) { __u32 len = sizeof(*info); @@ -128,6 +144,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) info->cgroup.cgroup_id); show_link_attach_type_json(info->cgroup.attach_type, json_wtr); break; + case BPF_LINK_TYPE_ITER: + show_iter_json(info, json_wtr); + break; case BPF_LINK_TYPE_NETNS: jsonw_uint_field(json_wtr, "netns_ino", info->netns.netns_ino); @@ -175,6 +194,16 @@ static void show_link_attach_type_plain(__u32 attach_type) printf("attach_type %u ", attach_type); } +static void show_iter_plain(struct bpf_link_info *info) +{ + const char *target_name = u64_to_ptr(info->iter.target_name); + + printf("target_name %s ", target_name); + + if (is_iter_map_target(target_name)) + printf("map_id %u ", info->iter.map.map_id); +} + static int show_link_close_plain(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; @@ -204,6 +233,9 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id); show_link_attach_type_plain(info->cgroup.attach_type); break; + case BPF_LINK_TYPE_ITER: + show_iter_plain(info); + break; case BPF_LINK_TYPE_NETNS: printf("\n\tnetns_ino %u ", info->netns.netns_ino); show_link_attach_type_plain(info->netns.attach_type); @@ -231,7 +263,7 @@ static int do_show_link(int fd) { struct bpf_link_info info; __u32 len = sizeof(info); - char raw_tp_name[256]; + char buf[256]; int err; memset(&info, 0, sizeof(info)); @@ -245,8 +277,14 @@ again: } if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT && !info.raw_tracepoint.tp_name) { - info.raw_tracepoint.tp_name = (unsigned long)&raw_tp_name; - info.raw_tracepoint.tp_name_len = sizeof(raw_tp_name); + info.raw_tracepoint.tp_name = (unsigned long)&buf; + info.raw_tracepoint.tp_name_len = sizeof(buf); + goto again; + } + if (info.type == BPF_LINK_TYPE_ITER && + !info.iter.target_name) { + info.iter.target_name = (unsigned long)&buf; + info.iter.target_name_len = sizeof(buf); goto again; } -- cgit v1.3.1 From dca5612f8eb9d0cf1dc254eb2adff1f16a588a7d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 21 Aug 2020 09:59:27 -0700 Subject: libbpf: Add perf_buffer APIs for better integration with outside epoll loop Add a set of APIs to perf_buffer manage to allow applications to integrate perf buffer polling into existing epoll-based infrastructure. One example is applications using libevent already and wanting to plug perf_buffer polling, instead of relying on perf_buffer__poll() and waste an extra thread to do it. But perf_buffer is still extremely useful to set up and consume perf buffer rings even for such use cases. So to accomodate such new use cases, add three new APIs: - perf_buffer__buffer_cnt() returns number of per-CPU buffers maintained by given instance of perf_buffer manager; - perf_buffer__buffer_fd() returns FD of perf_event corresponding to a specified per-CPU buffer; this FD is then polled independently; - perf_buffer__consume_buffer() consumes data from single per-CPU buffer, identified by its slot index. To support a simpler, but less efficient, way to integrate perf_buffer into external polling logic, also expose underlying epoll FD through perf_buffer__epoll_fd() API. It will need to be followed by perf_buffer__poll(), wasting extra syscall, or perf_buffer__consume(), wasting CPU to iterate buffers with no data. But could be simpler and more convenient for some cases. These APIs allow for great flexiblity, but do not sacrifice general usability of perf_buffer. Also exercise and check new APIs in perf_buffer selftest. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Link: https://lore.kernel.org/bpf/20200821165927.849538-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 56 ++++++++++++++++++- tools/lib/bpf/libbpf.h | 4 ++ tools/lib/bpf/libbpf.map | 8 +++ .../testing/selftests/bpf/prog_tests/perf_buffer.c | 65 ++++++++++++++++++---- 4 files changed, 121 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0bc1fd813408..210429c5b772 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9373,6 +9373,11 @@ static int perf_buffer__process_records(struct perf_buffer *pb, return 0; } +int perf_buffer__epoll_fd(const struct perf_buffer *pb) +{ + return pb->epoll_fd; +} + int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) { int i, cnt, err; @@ -9390,6 +9395,55 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) return cnt < 0 ? -errno : cnt; } +/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer + * manager. + */ +size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) +{ + return pb->cpu_cnt; +} + +/* + * Return perf_event FD of a ring buffer in *buf_idx* slot of + * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using + * select()/poll()/epoll() Linux syscalls. + */ +int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) +{ + struct perf_cpu_buf *cpu_buf; + + if (buf_idx >= pb->cpu_cnt) + return -EINVAL; + + cpu_buf = pb->cpu_bufs[buf_idx]; + if (!cpu_buf) + return -ENOENT; + + return cpu_buf->fd; +} + +/* + * Consume data from perf ring buffer corresponding to slot *buf_idx* in + * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to + * consume, do nothing and return success. + * Returns: + * - 0 on success; + * - <0 on failure. + */ +int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) +{ + struct perf_cpu_buf *cpu_buf; + + if (buf_idx >= pb->cpu_cnt) + return -EINVAL; + + cpu_buf = pb->cpu_bufs[buf_idx]; + if (!cpu_buf) + return -ENOENT; + + return perf_buffer__process_records(pb, cpu_buf); +} + int perf_buffer__consume(struct perf_buffer *pb) { int i, err; @@ -9402,7 +9456,7 @@ int perf_buffer__consume(struct perf_buffer *pb) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("error while processing records: %d\n", err); + pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); return err; } } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5ecb4069a9f0..308e0ded8f14 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -588,8 +588,12 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt, const struct perf_buffer_raw_opts *opts); LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); +LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb); +LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx); +LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb); +LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx); typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index e35bd6cdbdbf..66a6286d0716 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -299,3 +299,11 @@ LIBBPF_0.1.0 { btf__set_fd; btf__set_pointer_size; } LIBBPF_0.0.9; + +LIBBPF_0.2.0 { + global: + perf_buffer__buffer_cnt; + perf_buffer__buffer_fd; + perf_buffer__epoll_fd; + perf_buffer__consume_buffer; +} LIBBPF_0.1.0; diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index c33ec180b3f2..ca9f0895ec84 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -7,6 +7,8 @@ #include "test_perf_buffer.skel.h" #include "bpf/libbpf_internal.h" +static int duration; + /* AddressSanitizer sometimes crashes due to data dereference below, due to * this being mmap()'ed memory. Disable instrumentation with * no_sanitize_address attribute @@ -24,13 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) CPU_SET(cpu, cpu_seen); } +int trigger_on_cpu(int cpu) +{ + cpu_set_t cpu_set; + int err; + + CPU_ZERO(&cpu_set); + CPU_SET(cpu, &cpu_set); + + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err)) + return err; + + usleep(1); + + return 0; +} + void test_perf_buffer(void) { - int err, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; + int err, on_len, nr_on_cpus = 0, nr_cpus, i; struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; - cpu_set_t cpu_set, cpu_seen; + cpu_set_t cpu_seen; struct perf_buffer *pb; + int last_fd = -1, fd; bool *online; nr_cpus = libbpf_num_possible_cpus(); @@ -63,6 +83,9 @@ void test_perf_buffer(void) if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) goto out_close; + CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd", + "bad fd: %d\n", perf_buffer__epoll_fd(pb)); + /* trigger kprobe on every CPU */ CPU_ZERO(&cpu_seen); for (i = 0; i < nr_cpus; i++) { @@ -71,16 +94,8 @@ void test_perf_buffer(void) continue; } - CPU_ZERO(&cpu_set); - CPU_SET(i, &cpu_set); - - err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), - &cpu_set); - if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", - i, err)) + if (trigger_on_cpu(i)) goto out_close; - - usleep(1); } /* read perf buffer */ @@ -92,6 +107,34 @@ void test_perf_buffer(void) "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; + if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt", + "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus)) + goto out_close; + + for (i = 0; i < nr_cpus; i++) { + if (i >= on_len || !online[i]) + continue; + + fd = perf_buffer__buffer_fd(pb, i); + CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd); + last_fd = fd; + + err = perf_buffer__consume_buffer(pb, i); + if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err)) + goto out_close; + + CPU_CLR(i, &cpu_seen); + if (trigger_on_cpu(i)) + goto out_close; + + err = perf_buffer__consume_buffer(pb, i); + if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err)) + goto out_close; + + if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i)) + goto out_close; + } + out_free_pb: perf_buffer__free(pb); out_close: -- cgit v1.3.1 From bb23c0e1c57f3b40c8d2713401c1b42df911d424 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 21 Aug 2020 11:29:48 +0100 Subject: selftests: bpf: Test sockmap update from BPF Add a test which copies a socket from a sockmap into another sockmap or sockhash. This excercises bpf_map_update_elem support from BPF context. Compare the socket cookies from source and destination to ensure that the copy succeeded. Also check that the verifier rejects map_update from unsafe contexts. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200821102948.21918-7-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 78 ++++++++++++++++++++++ .../bpf/progs/test_sockmap_invalid_update.c | 23 +++++++ .../selftests/bpf/progs/test_sockmap_update.c | 48 +++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_update.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 96e7b7f84c65..65ce7c289534 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -4,6 +4,8 @@ #include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_update.skel.h" +#include "test_sockmap_invalid_update.skel.h" #define TCP_REPAIR 19 /* TCP sock is under repair right now */ @@ -101,6 +103,76 @@ out: test_skmsg_load_helpers__destroy(skel); } +static void test_sockmap_update(enum bpf_map_type map_type) +{ + struct bpf_prog_test_run_attr tattr; + int err, prog, src, dst, duration = 0; + struct test_sockmap_update *skel; + __u64 src_cookie, dst_cookie; + const __u32 zero = 0; + char dummy[14] = {0}; + __s64 sk; + + sk = connected_socket_v4(); + if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n")) + return; + + skel = test_sockmap_update__open_and_load(); + if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) { + close(sk); + return; + } + + prog = bpf_program__fd(skel->progs.copy_sock_map); + src = bpf_map__fd(skel->maps.src); + if (map_type == BPF_MAP_TYPE_SOCKMAP) + dst = bpf_map__fd(skel->maps.dst_sock_map); + else + dst = bpf_map__fd(skel->maps.dst_sock_hash); + + err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST); + if (CHECK(err, "update_elem(src)", "errno=%u\n", errno)) + goto out; + + err = bpf_map_lookup_elem(src, &zero, &src_cookie); + if (CHECK(err, "lookup_elem(src, cookie)", "errno=%u\n", errno)) + goto out; + + tattr = (struct bpf_prog_test_run_attr){ + .prog_fd = prog, + .repeat = 1, + .data_in = dummy, + .data_size_in = sizeof(dummy), + }; + + err = bpf_prog_test_run_xattr(&tattr); + if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run", + "errno=%u retval=%u\n", errno, tattr.retval)) + goto out; + + err = bpf_map_lookup_elem(dst, &zero, &dst_cookie); + if (CHECK(err, "lookup_elem(dst, cookie)", "errno=%u\n", errno)) + goto out; + + CHECK(dst_cookie != src_cookie, "cookie mismatch", "%llu != %llu\n", + dst_cookie, src_cookie); + +out: + close(sk); + test_sockmap_update__destroy(skel); +} + +static void test_sockmap_invalid_update(void) +{ + struct test_sockmap_invalid_update *skel; + int duration = 0; + + skel = test_sockmap_invalid_update__open_and_load(); + CHECK(skel, "open_and_load", "verifier accepted map_update\n"); + if (skel) + test_sockmap_invalid_update__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -111,4 +183,10 @@ void test_sockmap_basic(void) test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg load helpers")) test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap update")) + test_sockmap_update(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash update")) + test_sockmap_update(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap update in unsafe context")) + test_sockmap_invalid_update(); } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c new file mode 100644 index 000000000000..02a59e220cbc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +#include "vmlinux.h" +#include + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} map SEC(".maps"); + +SEC("sockops") +int bpf_sockmap(struct bpf_sock_ops *skops) +{ + __u32 key = 0; + + if (skops->sk) + bpf_map_update_elem(&map, &key, skops->sk, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c new file mode 100644 index 000000000000..9d0c9f28cab2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +#include "vmlinux.h" +#include + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} src SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} dst_sock_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} dst_sock_hash SEC(".maps"); + +SEC("classifier/copy_sock_map") +int copy_sock_map(void *ctx) +{ + struct bpf_sock *sk; + bool failed = false; + __u32 key = 0; + + sk = bpf_map_lookup_elem(&src, &key); + if (!sk) + return SK_DROP; + + if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0)) + failed = true; + + if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0)) + failed = true; + + bpf_sk_release(sk); + return failed ? SK_DROP : SK_PASS; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 3ac2e20fba07e57cd229091a568ac14a5434e6bb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 20 Aug 2020 16:12:35 -0700 Subject: selftests/bpf: BPF object files should depend only on libbpf headers There is no need to re-build BPF object files if any of the sources of libbpf change. So record more precise dependency only on libbpf/bpf_*.h headers. This eliminates unnecessary re-builds. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820231250.1293069-2-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a83b5827532f..09657d0afb5c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -316,7 +316,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ $$(INCLUDE_DIR)/vmlinux.h \ - $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ $(TRUNNER_BPF_LDFLAGS)) -- cgit v1.3.1 From 88a82120282bdef4c331e20991e3057f417beae4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 20 Aug 2020 16:12:36 -0700 Subject: libbpf: Factor out common ELF operations and improve logging Factor out common ELF operations done throughout the libbpf. This simplifies usage across multiple places in libbpf, as well as hide error reporting from higher-level functions and make error logging more consistent. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820231250.1293069-3-andriin@fb.com --- tools/lib/bpf/libbpf.c | 367 +++++++++++++++++++++++++++---------------------- 1 file changed, 206 insertions(+), 161 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 210429c5b772..dfb18d3ee590 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -398,6 +398,7 @@ struct bpf_object { Elf_Data *rodata; Elf_Data *bss; Elf_Data *st_ops_data; + size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct { GElf_Shdr shdr; @@ -435,6 +436,14 @@ struct bpf_object { }; #define obj_elf_valid(o) ((o)->efile.elf) +static const char *elf_sym_str(const struct bpf_object *obj, size_t off); +static const char *elf_sec_str(const struct bpf_object *obj, size_t off); +static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); +static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); +static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr); +static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); +static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); + void bpf_program__unload(struct bpf_program *prog) { int i; @@ -496,7 +505,7 @@ static char *__bpf_program__pin_name(struct bpf_program *prog) } static int -bpf_program__init(void *data, size_t size, char *section_name, int idx, +bpf_program__init(void *data, size_t size, const char *section_name, int idx, struct bpf_program *prog) { const size_t bpf_insn_sz = sizeof(struct bpf_insn); @@ -545,7 +554,7 @@ errout: static int bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, - char *section_name, int idx) + const char *section_name, int idx) { struct bpf_program prog, *progs; int nr_progs, err; @@ -570,7 +579,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, return -ENOMEM; } - pr_debug("found program %s\n", prog.section_name); + pr_debug("elf: found program '%s'\n", prog.section_name); obj->programs = progs; obj->nr_programs = nr_progs + 1; prog.obj = obj; @@ -590,8 +599,7 @@ bpf_object__init_prog_names(struct bpf_object *obj) prog = &obj->programs[pi]; - for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; - si++) { + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) { GElf_Sym sym; if (!gelf_getsym(symbols, si, &sym)) @@ -601,11 +609,9 @@ bpf_object__init_prog_names(struct bpf_object *obj) if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) continue; - name = elf_strptr(obj->efile.elf, - obj->efile.strtabidx, - sym.st_name); + name = elf_sym_str(obj, sym.st_name); if (!name) { - pr_warn("failed to get sym name string for prog %s\n", + pr_warn("prog '%s': failed to get symbol name\n", prog->section_name); return -LIBBPF_ERRNO__LIBELF; } @@ -615,17 +621,14 @@ bpf_object__init_prog_names(struct bpf_object *obj) name = ".text"; if (!name) { - pr_warn("failed to find sym for prog %s\n", + pr_warn("prog '%s': failed to find program symbol\n", prog->section_name); return -EINVAL; } prog->name = strdup(name); - if (!prog->name) { - pr_warn("failed to allocate memory for prog sym %s\n", - name); + if (!prog->name) return -ENOMEM; - } } return 0; @@ -1069,7 +1072,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) GElf_Ehdr *ep; if (obj_elf_valid(obj)) { - pr_warn("elf init: internal error\n"); + pr_warn("elf: init internal error\n"); return -LIBBPF_ERRNO__LIBELF; } @@ -1087,7 +1090,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("failed to open %s: %s\n", obj->path, cp); + pr_warn("elf: failed to open %s: %s\n", obj->path, cp); return err; } @@ -1095,22 +1098,36 @@ static int bpf_object__elf_init(struct bpf_object *obj) } if (!obj->efile.elf) { - pr_warn("failed to open %s as ELF file\n", obj->path); + pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__LIBELF; goto errout; } if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { - pr_warn("failed to get EHDR from %s\n", obj->path); + pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; goto errout; } ep = &obj->efile.ehdr; + if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) { + pr_warn("elf: failed to get section names section index for %s: %s\n", + obj->path, elf_errmsg(-1)); + err = -LIBBPF_ERRNO__FORMAT; + goto errout; + } + + /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { + pr_warn("elf: failed to get section names strings from %s: %s\n", + obj->path, elf_errmsg(-1)); + return -LIBBPF_ERRNO__FORMAT; + } + /* Old LLVM set e_machine to EM_NONE */ if (ep->e_type != ET_REL || (ep->e_machine && ep->e_machine != EM_BPF)) { - pr_warn("%s is not an eBPF object file\n", obj->path); + pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; } @@ -1132,7 +1149,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj) #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warn("endianness mismatch.\n"); + pr_warn("elf: endianness mismatch in %s.\n", obj->path); return -LIBBPF_ERRNO__ENDIAN; } @@ -1167,55 +1184,10 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } -static int bpf_object_search_section_size(const struct bpf_object *obj, - const char *name, size_t *d_size) -{ - const GElf_Ehdr *ep = &obj->efile.ehdr; - Elf *elf = obj->efile.elf; - Elf_Scn *scn = NULL; - int idx = 0; - - while ((scn = elf_nextscn(elf, scn)) != NULL) { - const char *sec_name; - Elf_Data *data; - GElf_Shdr sh; - - idx++; - if (gelf_getshdr(scn, &sh) != &sh) { - pr_warn("failed to get section(%d) header from %s\n", - idx, obj->path); - return -EIO; - } - - sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); - if (!sec_name) { - pr_warn("failed to get section(%d) name from %s\n", - idx, obj->path); - return -EIO; - } - - if (strcmp(name, sec_name)) - continue; - - data = elf_getdata(scn, 0); - if (!data) { - pr_warn("failed to get section(%d) data from %s(%s)\n", - idx, name, obj->path); - return -EIO; - } - - *d_size = data->d_size; - return 0; - } - - return -ENOENT; -} - int bpf_object__section_size(const struct bpf_object *obj, const char *name, __u32 *size) { int ret = -ENOENT; - size_t d_size; *size = 0; if (!name) { @@ -1233,9 +1205,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, if (obj->efile.st_ops_data) *size = obj->efile.st_ops_data->d_size; } else { - ret = bpf_object_search_section_size(obj, name, &d_size); - if (!ret) - *size = d_size; + Elf_Scn *scn = elf_sec_by_name(obj, name); + Elf_Data *data = elf_sec_data(obj, scn); + + if (data) { + ret = 0; /* found it */ + *size = data->d_size; + } } return *size ? 0 : ret; @@ -1260,8 +1236,7 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, GELF_ST_TYPE(sym.st_info) != STT_OBJECT) continue; - sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name); + sname = elf_sym_str(obj, sym.st_name); if (!sname) { pr_warn("failed to get sym name string for var %s\n", name); @@ -1738,12 +1713,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (!symbols) return -EINVAL; - scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); - if (scn) - data = elf_getdata(scn, NULL); + + scn = elf_sec_by_idx(obj, obj->efile.maps_shndx); + data = elf_sec_data(obj, scn); if (!scn || !data) { - pr_warn("failed to get Elf_Data from map section %d\n", - obj->efile.maps_shndx); + pr_warn("elf: failed to get legacy map definitions for %s\n", + obj->path); return -EINVAL; } @@ -1765,12 +1740,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) nr_maps++; } /* Assume equally sized map definitions */ - pr_debug("maps in %s: %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); + pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n", + nr_maps, data->d_size, obj->path); if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { - pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); + pr_warn("elf: unable to determine legacy map definition size in %s\n", + obj->path); return -EINVAL; } map_def_sz = data->d_size / nr_maps; @@ -1791,8 +1766,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (IS_ERR(map)) return PTR_ERR(map); - map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name); + map_name = elf_sym_str(obj, sym.st_name); if (!map_name) { pr_warn("failed to get map #%d name sym string for obj %s\n", i, obj->path); @@ -2274,12 +2248,11 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, if (obj->efile.btf_maps_shndx < 0) return 0; - scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx); - if (scn) - data = elf_getdata(scn, NULL); + scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); + data = elf_sec_data(obj, scn); if (!scn || !data) { - pr_warn("failed to get Elf_Data from map section %d (%s)\n", - obj->efile.maps_shndx, MAPS_ELF_SEC); + pr_warn("elf: failed to get %s map definitions for %s\n", + MAPS_ELF_SEC, obj->path); return -EINVAL; } @@ -2337,20 +2310,12 @@ static int bpf_object__init_maps(struct bpf_object *obj, static bool section_have_execinstr(struct bpf_object *obj, int idx) { - Elf_Scn *scn; GElf_Shdr sh; - scn = elf_getscn(obj->efile.elf, idx); - if (!scn) - return false; - - if (gelf_getshdr(scn, &sh) != &sh) + if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh)) return false; - if (sh.sh_flags & SHF_EXECINSTR) - return true; - - return false; + return sh.sh_flags & SHF_EXECINSTR; } static bool btf_needs_sanitization(struct bpf_object *obj) @@ -2594,61 +2559,156 @@ report: return err; } +static const char *elf_sym_str(const struct bpf_object *obj, size_t off) +{ + const char *name; + + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); + if (!name) { + pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", + off, obj->path, elf_errmsg(-1)); + return NULL; + } + + return name; +} + +static const char *elf_sec_str(const struct bpf_object *obj, size_t off) +{ + const char *name; + + name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); + if (!name) { + pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", + off, obj->path, elf_errmsg(-1)); + return NULL; + } + + return name; +} + +static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) +{ + Elf_Scn *scn; + + scn = elf_getscn(obj->efile.elf, idx); + if (!scn) { + pr_warn("elf: failed to get section(%zu) from %s: %s\n", + idx, obj->path, elf_errmsg(-1)); + return NULL; + } + return scn; +} + +static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) +{ + Elf_Scn *scn = NULL; + Elf *elf = obj->efile.elf; + const char *sec_name; + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + sec_name = elf_sec_name(obj, scn); + if (!sec_name) + return NULL; + + if (strcmp(sec_name, name) != 0) + continue; + + return scn; + } + return NULL; +} + +static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr) +{ + if (!scn) + return -EINVAL; + + if (gelf_getshdr(scn, hdr) != hdr) { + pr_warn("elf: failed to get section(%zu) header from %s: %s\n", + elf_ndxscn(scn), obj->path, elf_errmsg(-1)); + return -EINVAL; + } + + return 0; +} + +static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) +{ + const char *name; + GElf_Shdr sh; + + if (!scn) + return NULL; + + if (elf_sec_hdr(obj, scn, &sh)) + return NULL; + + name = elf_sec_str(obj, sh.sh_name); + if (!name) { + pr_warn("elf: failed to get section(%zu) name from %s: %s\n", + elf_ndxscn(scn), obj->path, elf_errmsg(-1)); + return NULL; + } + + return name; +} + +static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) +{ + Elf_Data *data; + + if (!scn) + return NULL; + + data = elf_getdata(scn, 0); + if (!data) { + pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", + elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "", + obj->path, elf_errmsg(-1)); + return NULL; + } + + return data; +} + static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; - GElf_Ehdr *ep = &obj->efile.ehdr; Elf_Data *btf_ext_data = NULL; Elf_Data *btf_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; - /* Elf is corrupted/truncated, avoid calling elf_strptr. */ - if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { - pr_warn("failed to get e_shstrndx from %s\n", obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - while ((scn = elf_nextscn(elf, scn)) != NULL) { - char *name; + const char *name; GElf_Shdr sh; Elf_Data *data; idx++; - if (gelf_getshdr(scn, &sh) != &sh) { - pr_warn("failed to get section(%d) header from %s\n", - idx, obj->path); + + if (elf_sec_hdr(obj, scn, &sh)) return -LIBBPF_ERRNO__FORMAT; - } - name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); - if (!name) { - pr_warn("failed to get section(%d) name from %s\n", - idx, obj->path); + name = elf_sec_str(obj, sh.sh_name); + if (!name) return -LIBBPF_ERRNO__FORMAT; - } - data = elf_getdata(scn, 0); - if (!data) { - pr_warn("failed to get section(%d) data from %s(%s)\n", - idx, name, obj->path); + data = elf_sec_data(obj, scn); + if (!data) return -LIBBPF_ERRNO__FORMAT; - } - pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", + + pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", idx, name, (unsigned long)data->d_size, (int)sh.sh_link, (unsigned long)sh.sh_flags, (int)sh.sh_type); if (strcmp(name, "license") == 0) { - err = bpf_object__init_license(obj, - data->d_buf, - data->d_size); + err = bpf_object__init_license(obj, data->d_buf, data->d_size); if (err) return err; } else if (strcmp(name, "version") == 0) { - err = bpf_object__init_kversion(obj, - data->d_buf, - data->d_size); + err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); if (err) return err; } else if (strcmp(name, "maps") == 0) { @@ -2661,8 +2721,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { - pr_warn("bpf: multiple SYMTAB in %s\n", - obj->path); + pr_warn("elf: multiple symbol tables in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } obj->efile.symbols = data; @@ -2675,16 +2734,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) err = bpf_object__add_program(obj, data->d_buf, data->d_size, name, idx); - if (err) { - char errmsg[STRERR_BUFSIZE]; - char *cp; - - cp = libbpf_strerror_r(-err, errmsg, - sizeof(errmsg)); - pr_warn("failed to alloc program %s (%s): %s", - name, obj->path, cp); + if (err) return err; - } } else if (strcmp(name, DATA_SEC) == 0) { obj->efile.data = data; obj->efile.data_shndx = idx; @@ -2695,7 +2746,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.st_ops_data = data; obj->efile.st_ops_shndx = idx; } else { - pr_debug("skip section(%d) %s\n", idx, name); + pr_debug("elf: skipping unrecognized data section(%d) %s\n", + idx, name); } } else if (sh.sh_type == SHT_REL) { int nr_sects = obj->efile.nr_reloc_sects; @@ -2706,34 +2758,32 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!section_have_execinstr(obj, sec) && strcmp(name, ".rel" STRUCT_OPS_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { - pr_debug("skip relo %s(%d) for section(%d)\n", - name, idx, sec); + pr_debug("elf: skipping relo section(%d) %s for section(%d) %s\n", + idx, name, sec, + elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: ""); continue; } sects = libbpf_reallocarray(sects, nr_sects + 1, sizeof(*obj->efile.reloc_sects)); - if (!sects) { - pr_warn("reloc_sects realloc failed\n"); + if (!sects) return -ENOMEM; - } obj->efile.reloc_sects = sects; obj->efile.nr_reloc_sects++; obj->efile.reloc_sects[nr_sects].shdr = sh; obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && - strcmp(name, BSS_SEC) == 0) { + } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { obj->efile.bss = data; obj->efile.bss_shndx = idx; } else { - pr_debug("skip section(%d) %s\n", idx, name); + pr_debug("elf: skipping section(%d) %s\n", idx, name); } } if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { - pr_warn("Corrupted ELF file: index of strtab invalid\n"); + pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } return bpf_object__init_btf(obj, btf_data, btf_ext_data); @@ -2894,14 +2944,13 @@ static int bpf_object__collect_externs(struct bpf_object *obj) if (!obj->efile.symbols) return 0; - scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx); - if (!scn) - return -LIBBPF_ERRNO__FORMAT; - if (gelf_getshdr(scn, &sh) != &sh) + scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); + if (elf_sec_hdr(obj, scn, &sh)) return -LIBBPF_ERRNO__FORMAT; - n = sh.sh_size / sh.sh_entsize; + n = sh.sh_size / sh.sh_entsize; pr_debug("looking for externs among %d symbols...\n", n); + for (i = 0; i < n; i++) { GElf_Sym sym; @@ -2909,8 +2958,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -LIBBPF_ERRNO__FORMAT; if (!sym_is_extern(&sym)) continue; - ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name); + ext_name = elf_sym_str(obj, sym.st_name); if (!ext_name || !ext_name[0]) continue; @@ -3289,16 +3337,15 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return -LIBBPF_ERRNO__FORMAT; } if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { - pr_warn("relocation: symbol %"PRIx64" not found\n", - GELF_R_SYM(rel.r_info)); + pr_warn("relocation: symbol %zx not found\n", + (size_t)GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } if (rel.r_offset % sizeof(struct bpf_insn)) return -LIBBPF_ERRNO__FORMAT; insn_idx = rel.r_offset / sizeof(struct bpf_insn); - name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name) ? : ""; + name = elf_sym_str(obj, sym.st_name) ?: ""; pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), @@ -5720,8 +5767,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, i, (size_t)GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name) ? : ""; + name = elf_sym_str(obj, sym.st_name) ?: ""; if (sym.st_shndx != obj->efile.btf_maps_shndx) { pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", i, name); @@ -7663,8 +7709,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -LIBBPF_ERRNO__FORMAT; } - name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name) ? : ""; + name = elf_sym_str(obj, sym.st_name) ?: ""; map = find_struct_ops_map_by_offset(obj, rel.r_offset); if (!map) { pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", -- cgit v1.3.1 From 819c23af561c4dc8c35faaacacdfa9ba81bacf5f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 20 Aug 2020 16:12:37 -0700 Subject: libbpf: Add __noinline macro to bpf_helpers.h __noinline is pretty frequently used, especially with BPF subprograms, so add them along the __always_inline, for user convenience and completeness. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820231250.1293069-4-andriin@fb.com --- tools/lib/bpf/bpf_helpers.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index e9a4ecddb7a5..1106777df00b 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -32,6 +32,9 @@ #ifndef __always_inline #define __always_inline __attribute__((always_inline)) #endif +#ifndef __noinline +#define __noinline __attribute__((noinline)) +#endif #ifndef __weak #define __weak __attribute__((weak)) #endif -- cgit v1.3.1 From 50e09460d9f8dee9fbaaa3194c7921753d75cd96 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 20 Aug 2020 16:12:38 -0700 Subject: libbpf: Skip well-known ELF sections when iterating ELF Skip and don't log ELF sections that libbpf knows about and ignores during ELF processing. This allows to not unnecessarily log details about those ELF sections and cleans up libbpf debug log. Ignored sections include DWARF data, string table, empty .text section and few special (e.g., .llvm_addrsig) useless sections. With such ELF sections out of the way, log unrecognized ELF sections at pr_info level to increase visibility. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820231250.1293069-5-andriin@fb.com --- tools/lib/bpf/libbpf.c | 55 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index dfb18d3ee590..2ae89b84a2be 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2672,6 +2672,46 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) return data; } +static bool is_sec_name_dwarf(const char *name) +{ + /* approximation, but the actual list is too long */ + return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0; +} + +static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) +{ + /* no special handling of .strtab */ + if (hdr->sh_type == SHT_STRTAB) + return true; + + /* ignore .llvm_addrsig section as well */ + if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */) + return true; + + /* no subprograms will lead to an empty .text section, ignore it */ + if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && + strcmp(name, ".text") == 0) + return true; + + /* DWARF sections */ + if (is_sec_name_dwarf(name)) + return true; + + if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) { + name += sizeof(".rel") - 1; + /* DWARF section relocations */ + if (is_sec_name_dwarf(name)) + return true; + + /* .BTF and .BTF.ext don't need relocations */ + if (strcmp(name, BTF_ELF_SEC) == 0 || + strcmp(name, BTF_EXT_ELF_SEC) == 0) + return true; + } + + return false; +} + static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; @@ -2694,6 +2734,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!name) return -LIBBPF_ERRNO__FORMAT; + if (ignore_elf_section(&sh, name)) + continue; + data = elf_sec_data(obj, scn); if (!data) return -LIBBPF_ERRNO__FORMAT; @@ -2746,8 +2789,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.st_ops_data = data; obj->efile.st_ops_shndx = idx; } else { - pr_debug("elf: skipping unrecognized data section(%d) %s\n", - idx, name); + pr_info("elf: skipping unrecognized data section(%d) %s\n", + idx, name); } } else if (sh.sh_type == SHT_REL) { int nr_sects = obj->efile.nr_reloc_sects; @@ -2758,9 +2801,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!section_have_execinstr(obj, sec) && strcmp(name, ".rel" STRUCT_OPS_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { - pr_debug("elf: skipping relo section(%d) %s for section(%d) %s\n", - idx, name, sec, - elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: ""); + pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", + idx, name, sec, + elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: ""); continue; } @@ -2778,7 +2821,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.bss = data; obj->efile.bss_shndx = idx; } else { - pr_debug("elf: skipping section(%d) %s\n", idx, name); + pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, sh.sh_size); } } -- cgit v1.3.1 From 9c0f8cbdc0e9cf8a2a3a96045778b8d759f172c2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 20 Aug 2020 16:12:39 -0700 Subject: libbpf: Normalize and improve logging across few functions Make libbpf logs follow similar pattern and provide more context like section name or program name, where appropriate. Also, add BPF_INSN_SZ constant and use it throughout to clean up code a little bit. This commit doesn't have any functional changes and just removes some code changes out of the way before bigger refactoring in libbpf internals. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820231250.1293069-6-andriin@fb.com --- tools/lib/bpf/libbpf.c | 117 ++++++++++++++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2ae89b84a2be..fb7b8fb1d3fa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -63,6 +63,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define BPF_INSN_SZ (sizeof(struct bpf_insn)) + /* vsprintf() in __base_pr() uses nonliteral format string. It may break * compilation if user enables corresponding warning. Disable it explicitly. */ @@ -3225,7 +3227,7 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) static int bpf_program__record_reloc(struct bpf_program *prog, struct reloc_desc *reloc_desc, - __u32 insn_idx, const char *name, + __u32 insn_idx, const char *sym_name, const GElf_Sym *sym, const GElf_Rel *rel) { struct bpf_insn *insn = &prog->insns[insn_idx]; @@ -3233,22 +3235,25 @@ static int bpf_program__record_reloc(struct bpf_program *prog, struct bpf_object *obj = prog->obj; __u32 shdr_idx = sym->st_shndx; enum libbpf_map_type type; + const char *sym_sec_name; struct bpf_map *map; /* sub-program call relocation */ if (insn->code == (BPF_JMP | BPF_CALL)) { if (insn->src_reg != BPF_PSEUDO_CALL) { - pr_warn("incorrect bpf_call opcode\n"); + pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); return -LIBBPF_ERRNO__RELOC; } /* text_shndx can be 0, if no default "main" program exists */ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { - pr_warn("bad call relo against section %u\n", shdr_idx); + sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); + pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", + prog->name, sym_name, sym_sec_name); return -LIBBPF_ERRNO__RELOC; } - if (sym->st_value % 8) { - pr_warn("bad call relo offset: %zu\n", - (size_t)sym->st_value); + if (sym->st_value % BPF_INSN_SZ) { + pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", + prog->name, sym_name, (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_CALL; @@ -3259,8 +3264,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { - pr_warn("invalid relo for insns[%d].code 0x%x\n", - insn_idx, insn->code); + pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", + prog->name, sym_name, insn_idx, insn->code); return -LIBBPF_ERRNO__RELOC; } @@ -3275,12 +3280,12 @@ static int bpf_program__record_reloc(struct bpf_program *prog, break; } if (i >= n) { - pr_warn("extern relo failed to find extern for sym %d\n", - sym_idx); + pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", + prog->name, sym_name, sym_idx); return -LIBBPF_ERRNO__RELOC; } - pr_debug("found extern #%d '%s' (sym %d) for insn %u\n", - i, ext->name, ext->sym_idx, insn_idx); + pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", + prog->name, i, ext->name, ext->sym_idx, insn_idx); reloc_desc->type = RELO_EXTERN; reloc_desc->insn_idx = insn_idx; reloc_desc->sym_off = i; /* sym_off stores extern index */ @@ -3288,18 +3293,19 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { - pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", - name, shdr_idx); + pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", + prog->name, sym_name, shdr_idx); return -LIBBPF_ERRNO__RELOC; } type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); + sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); /* generic map reference relocation */ if (type == LIBBPF_MAP_UNSPEC) { if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { - pr_warn("bad map relo against section %u\n", - shdr_idx); + pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", + prog->name, sym_name, sym_sec_name); return -LIBBPF_ERRNO__RELOC; } for (map_idx = 0; map_idx < nr_maps; map_idx++) { @@ -3308,14 +3314,14 @@ static int bpf_program__record_reloc(struct bpf_program *prog, map->sec_idx != sym->st_shndx || map->sec_offset != sym->st_value) continue; - pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n", - map_idx, map->name, map->sec_idx, + pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", + prog->name, map_idx, map->name, map->sec_idx, map->sec_offset, insn_idx); break; } if (map_idx >= nr_maps) { - pr_warn("map relo failed to find map for sec %u, off %zu\n", - shdr_idx, (size_t)sym->st_value); + pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", + prog->name, sym_sec_name, (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_LD64; @@ -3327,21 +3333,22 @@ static int bpf_program__record_reloc(struct bpf_program *prog, /* global data map relocation */ if (!bpf_object__shndx_is_data(obj, shdr_idx)) { - pr_warn("bad data relo against section %u\n", shdr_idx); + pr_warn("prog '%s': bad data relo against section '%s'\n", + prog->name, sym_sec_name); return -LIBBPF_ERRNO__RELOC; } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; if (map->libbpf_type != type) continue; - pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n", - map_idx, map->name, map->sec_idx, map->sec_offset, - insn_idx); + pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", + prog->name, map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); break; } if (map_idx >= nr_maps) { - pr_warn("data relo failed to find map for sec %u\n", - shdr_idx); + pr_warn("prog '%s': data relo failed to find map for section '%s'\n", + prog->name, sym_sec_name); return -LIBBPF_ERRNO__RELOC; } @@ -3357,9 +3364,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, Elf_Data *data, struct bpf_object *obj) { Elf_Data *symbols = obj->efile.symbols; + const char *relo_sec_name, *sec_name; + size_t sec_idx = shdr->sh_info; int err, i, nrels; - pr_debug("collecting relocating info for: '%s'\n", prog->section_name); + relo_sec_name = elf_sec_str(obj, shdr->sh_name); + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + if (!relo_sec_name || !sec_name) + return -EINVAL; + + pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", + relo_sec_name, sec_idx, sec_name); nrels = shdr->sh_size / shdr->sh_entsize; prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); @@ -3370,34 +3385,34 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, prog->nr_reloc = nrels; for (i = 0; i < nrels; i++) { - const char *name; + const char *sym_name; __u32 insn_idx; GElf_Sym sym; GElf_Rel rel; if (!gelf_getrel(data, i, &rel)) { - pr_warn("relocation: failed to get %d reloc\n", i); + pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); return -LIBBPF_ERRNO__FORMAT; } if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { - pr_warn("relocation: symbol %zx not found\n", - (size_t)GELF_R_SYM(rel.r_info)); + pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", + relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); return -LIBBPF_ERRNO__FORMAT; } - if (rel.r_offset % sizeof(struct bpf_insn)) + if (rel.r_offset % BPF_INSN_SZ) { + pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", + relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); return -LIBBPF_ERRNO__FORMAT; + } - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - name = elf_sym_str(obj, sym.st_name) ?: ""; + insn_idx = rel.r_offset / BPF_INSN_SZ; + sym_name = elf_sym_str(obj, sym.st_name) ?: ""; - pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", - (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), - (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info), - GELF_ST_BIND(sym.st_info), sym.st_name, name, - insn_idx); + pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", + relo_sec_name, i, insn_idx, sym_name); err = bpf_program__record_reloc(prog, &prog->reloc_desc[i], - insn_idx, name, &sym, &rel); + insn_idx, sym_name, &sym, &rel); if (err) return err; } @@ -5155,9 +5170,9 @@ static int bpf_core_patch_insn(struct bpf_program *prog, int insn_idx; __u8 class; - if (relo->insn_off % sizeof(struct bpf_insn)) + if (relo->insn_off % BPF_INSN_SZ) return -EINVAL; - insn_idx = relo->insn_off / sizeof(struct bpf_insn); + insn_idx = relo->insn_off / BPF_INSN_SZ; insn = &prog->insns[insn_idx]; class = BPF_CLASS(insn->code); @@ -5588,7 +5603,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) goto out; } - pr_debug("prog '%s': performing %d CO-RE offset relocs\n", + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { @@ -5596,7 +5611,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) targ_btf, cand_cache); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", - sec_name, i, err); + prog->name, i, err); goto out; } } @@ -5716,7 +5731,8 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) return err; break; default: - pr_warn("relo #%d: bad relo type %d\n", i, relo->type); + pr_warn("prog '%s': relo #%d: bad relo type %d\n", + prog->name, i, relo->type); return -EINVAL; } } @@ -5751,7 +5767,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err = bpf_program__relocate(prog, obj); if (err) { - pr_warn("failed to relocate '%s'\n", prog->section_name); + pr_warn("prog '%s': failed to relocate data references: %d\n", + prog->name, err); return err; } break; @@ -5766,7 +5783,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err = bpf_program__relocate(prog, obj); if (err) { - pr_warn("failed to relocate '%s'\n", prog->section_name); + pr_warn("prog '%s': failed to relocate calls: %d\n", + prog->name, err); return err; } } @@ -6198,8 +6216,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (bpf_program__is_function_storage(prog, obj)) continue; if (!prog->load) { - pr_debug("prog '%s'('%s'): skipped loading\n", - prog->name, prog->section_name); + pr_debug("prog '%s': skipped loading\n", prog->name); continue; } prog->log_level |= log_level; @@ -7343,7 +7360,7 @@ int bpf_program__fd(const struct bpf_program *prog) size_t bpf_program__size(const struct bpf_program *prog) { - return prog->insns_cnt * sizeof(struct bpf_insn); + return prog->insns_cnt * BPF_INSN_SZ; } int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, -- cgit v1.3.1 From 4ffa22fd22a7cbde1a1394b2707ea73593dc0fda Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Thu, 23 Jul 2020 09:29:43 +0300 Subject: iio: add IIO_MOD_O2 modifier Add modifier IIO_MOD_O2 for O2 concentration reporting Signed-off-by: Matt Ranostay Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 2 ++ drivers/iio/industrialio-core.c | 1 + include/uapi/linux/iio/types.h | 1 + tools/iio/iio_event_monitor.c | 2 ++ 4 files changed, 6 insertions(+) (limited to 'tools') diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 5c62bfb0f3f5..405181fde40a 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1564,6 +1564,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_concentration_ethanol_raw What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_ethanol_raw What: /sys/bus/iio/devices/iio:deviceX/in_concentration_h2_raw What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_h2_raw +What: /sys/bus/iio/devices/iio:deviceX/in_concentration_o2_raw +What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_o2_raw What: /sys/bus/iio/devices/iio:deviceX/in_concentration_voc_raw What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw KernelVersion: 4.3 diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 606d5e61c575..59003dc44e60 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -133,6 +133,7 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_PM10] = "pm10", [IIO_MOD_ETHANOL] = "ethanol", [IIO_MOD_H2] = "h2", + [IIO_MOD_O2] = "o2", }; /* relies on pairs of these shared then separate */ diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index fdd81affca4b..48c13147c0a8 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -94,6 +94,7 @@ enum iio_modifier { IIO_MOD_PM10, IIO_MOD_ETHANOL, IIO_MOD_H2, + IIO_MOD_O2, }; enum iio_event_type { diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index f115d166c985..bb03859db89d 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -119,6 +119,7 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_PM2P5] = "pm2p5", [IIO_MOD_PM4] = "pm4", [IIO_MOD_PM10] = "pm10", + [IIO_MOD_O2] = "o2", }; static bool event_is_known(struct iio_event_data *event) @@ -211,6 +212,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_MOD_PM2P5: case IIO_MOD_PM4: case IIO_MOD_PM10: + case IIO_MOD_O2: break; default: return false; -- cgit v1.3.1 From 2b8ee4f05d4f6a6c427ad30dd6c1bb49eb2efd3b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:21 -0700 Subject: tcp: bpf: Add TCP_BPF_DELACK_MAX setsockopt This change is mostly from an internal patch and adapts it from sysctl config to the bpf_setsockopt setup. The bpf_prog can set the max delay ack by using bpf_setsockopt(TCP_BPF_DELACK_MAX). This max delay ack can be communicated to its peer through bpf header option. The receiving peer can then use this max delay ack and set a potentially lower rto by using bpf_setsockopt(TCP_BPF_RTO_MIN) which will be introduced in the next patch. Another later selftest patch will also use it like the above to show how to write and parse bpf tcp header option. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200820190021.2884000-1-kafai@fb.com --- include/net/inet_connection_sock.h | 1 + include/uapi/linux/bpf.h | 1 + net/core/filter.c | 8 ++++++++ net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_output.c | 2 ++ tools/include/uapi/linux/bpf.h | 1 + 6 files changed, 15 insertions(+) (limited to 'tools') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index aa8893c68c50..da7264a1ebfc 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -86,6 +86,7 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + __u32 icsk_delack_max; __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a1bbaff7a0af..7b905cb0213e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4257,6 +4257,7 @@ enum { enum { TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ + TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ }; struct bpf_perf_event_value { diff --git a/net/core/filter.c b/net/core/filter.c index c847b1285acd..80fe7420f609 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4459,6 +4459,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, } else { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + unsigned long timeout; if (optlen != sizeof(int)) return -EINVAL; @@ -4480,6 +4481,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, tp->snd_ssthresh = val; } break; + case TCP_BPF_DELACK_MAX: + timeout = usecs_to_jiffies(val); + if (timeout > TCP_DELACK_MAX || + timeout < TCP_TIMEOUT_MIN) + return -EINVAL; + inet_csk(sk)->icsk_delack_max = timeout; + break; case TCP_SAVE_SYN: if (val < 0 || val > 1) ret = -EINVAL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 87d3036d8bd8..44c353a39ad4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -418,6 +418,7 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_delack_max = TCP_DELACK_MAX; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); @@ -2685,6 +2686,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_backoff = 0; icsk->icsk_probes_out = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_delack_max = TCP_DELACK_MAX; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 85ff417bda7f..44ffa4891beb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3741,6 +3741,8 @@ void tcp_send_delayed_ack(struct sock *sk) ato = min(ato, max_ato); } + ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max); + /* Stay within the limit we were given */ timeout = jiffies + ato; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a1bbaff7a0af..7b905cb0213e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4257,6 +4257,7 @@ enum { enum { TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ + TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ }; struct bpf_perf_event_value { -- cgit v1.3.1 From ca584ba070864c606f3a54faaafe774726d5b4a1 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:27 -0700 Subject: tcp: bpf: Add TCP_BPF_RTO_MIN for bpf_setsockopt This patch adds bpf_setsockopt(TCP_BPF_RTO_MIN) to allow bpf prog to set the min rto of a connection. It could be used together with the earlier patch which has added bpf_setsockopt(TCP_BPF_DELACK_MAX). A later selftest patch will communicate the max delay ack in a bpf tcp header option and then the receiving side can use bpf_setsockopt(TCP_BPF_RTO_MIN) to set a shorter rto. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200820190027.2884170-1-kafai@fb.com --- include/net/inet_connection_sock.h | 1 + include/net/tcp.h | 2 +- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 7 +++++++ net/ipv4/tcp.c | 2 ++ tools/include/uapi/linux/bpf.h | 1 + 6 files changed, 13 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index da7264a1ebfc..c738abeb3265 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -86,6 +86,7 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + __u32 icsk_rto_min; __u32 icsk_delack_max; __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; diff --git a/include/net/tcp.h b/include/net/tcp.h index eab6c7510b5b..dda778c782fe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -699,7 +699,7 @@ static inline void tcp_fast_path_check(struct sock *sk) static inline u32 tcp_rto_min(struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); - u32 rto_min = TCP_RTO_MIN; + u32 rto_min = inet_csk(sk)->icsk_rto_min; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7b905cb0213e..1ae20058b574 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4258,6 +4258,7 @@ enum { TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ + TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ }; struct bpf_perf_event_value { diff --git a/net/core/filter.c b/net/core/filter.c index 80fe7420f609..075ab71b985c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4488,6 +4488,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet_csk(sk)->icsk_delack_max = timeout; break; + case TCP_BPF_RTO_MIN: + timeout = usecs_to_jiffies(val); + if (timeout > TCP_RTO_MIN || + timeout < TCP_TIMEOUT_MIN) + return -EINVAL; + inet_csk(sk)->icsk_rto_min = timeout; + break; case TCP_SAVE_SYN: if (val < 0 || val > 1) ret = -EINVAL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 44c353a39ad4..6075cb091a20 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -418,6 +418,7 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); @@ -2686,6 +2687,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_backoff = 0; icsk->icsk_probes_out = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd = TCP_INIT_CWND; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7b905cb0213e..1ae20058b574 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4258,6 +4258,7 @@ enum { TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ + TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ }; struct bpf_perf_event_value { -- cgit v1.3.1 From 00d211a4ea6f48e8e3b758813fe23ad28193d3bf Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:46 -0700 Subject: bpf: tcp: Add bpf_skops_parse_hdr() The patch adds a function bpf_skops_parse_hdr(). It will call the bpf prog to parse the TCP header received at a tcp_sock that has at least reached the ESTABLISHED state. For the packets received during the 3WHS (SYN, SYNACK and ACK), the received skb will be available to the bpf prog during the callback in bpf_skops_established() introduced in the previous patch and in the bpf_skops_write_hdr_opt() that will be added in the next patch. Calling bpf prog to parse header is controlled by two new flags in tp->bpf_sock_ops_cb_flags: BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG and BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG. When BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG is set, the bpf prog will only be called when there is unknown option in the TCP header. When BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG is set, the bpf prog will be called on all received TCP header. This function is half implemented to highlight the changes in TCP stack. The actual codes preparing the bpf running context and invoking the bpf prog will be added in the later patch with other necessary bpf pieces. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/bpf/20200820190046.2885054-1-kafai@fb.com --- include/uapi/linux/bpf.h | 4 +++- net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 4 +++- 3 files changed, 42 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1ae20058b574..010ed2abcb66 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4173,8 +4173,10 @@ enum { BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0x3F, }; /* List of known BPF sock_ops operators. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7b0faa2bfe32..b520450170d1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -139,6 +139,36 @@ EXPORT_SYMBOL_GPL(clean_acked_data_flush); #endif #ifdef CONFIG_CGROUP_BPF +static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb) +{ + bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown && + BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG); + bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); + + if (likely(!unknown_opt && !parse_all_opt)) + return; + + /* The skb will be handled in the + * bpf_skops_established() or + * bpf_skops_write_hdr_opt(). + */ + switch (sk->sk_state) { + case TCP_SYN_RECV: + case TCP_SYN_SENT: + case TCP_LISTEN: + return; + } + + /* BPF prog will have access to the sk and skb. + * + * The bpf running context preparation and the actual bpf prog + * calling will be implemented in a later PATCH together with + * other bpf pieces. + */ +} + static void bpf_skops_established(struct sock *sk, int bpf_op, struct sk_buff *skb) { @@ -155,6 +185,10 @@ static void bpf_skops_established(struct sock *sk, int bpf_op, BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); } #else +static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb) +{ +} + static void bpf_skops_established(struct sock *sk, int bpf_op, struct sk_buff *skb) { @@ -5623,6 +5657,8 @@ syn_challenge: goto discard; } + bpf_skops_parse_hdr(sk, skb); + return true; discard: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1ae20058b574..010ed2abcb66 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4173,8 +4173,10 @@ enum { BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0x3F, }; /* List of known BPF sock_ops operators. -- cgit v1.3.1 From 331fca4315efa3bbd258fbdf8209d59d253c0480 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:52 -0700 Subject: bpf: tcp: Add bpf_skops_hdr_opt_len() and bpf_skops_write_hdr_opt() The bpf prog needs to parse the SYN header to learn what options have been sent by the peer's bpf-prog before writing its options into SYNACK. This patch adds a "syn_skb" arg to tcp_make_synack() and send_synack(). This syn_skb will eventually be made available (as read-only) to the bpf prog. This will be the only SYN packet available to the bpf prog during syncookie. For other regular cases, the bpf prog can also use the saved_syn. When writing options, the bpf prog will first be called to tell the kernel its required number of bytes. It is done by the new bpf_skops_hdr_opt_len(). The bpf prog will only be called when the new BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG is set in tp->bpf_sock_ops_cb_flags. When the bpf prog returns, the kernel will know how many bytes are needed and then update the "*remaining" arg accordingly. 4 byte alignment will be included in the "*remaining" before this function returns. The 4 byte aligned number of bytes will also be stored into the opts->bpf_opt_len. "bpf_opt_len" is a newly added member to the struct tcp_out_options. Then the new bpf_skops_write_hdr_opt() will call the bpf prog to write the header options. The bpf prog is only called if it has reserved spaces before (opts->bpf_opt_len > 0). The bpf prog is the last one getting a chance to reserve header space and writing the header option. These two functions are half implemented to highlight the changes in TCP stack. The actual codes preparing the bpf running context and invoking the bpf prog will be added in the later patch with other necessary bpf pieces. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/bpf/20200820190052.2885316-1-kafai@fb.com --- include/net/tcp.h | 6 ++- include/uapi/linux/bpf.h | 3 +- net/ipv4/tcp_input.c | 5 +- net/ipv4/tcp_ipv4.c | 5 +- net/ipv4/tcp_output.c | 105 ++++++++++++++++++++++++++++++++++++----- net/ipv6/tcp_ipv6.c | 5 +- tools/include/uapi/linux/bpf.h | 3 +- 7 files changed, 109 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/include/net/tcp.h b/include/net/tcp.h index c186dbf731e1..3e768a6b8264 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -455,7 +455,8 @@ enum tcp_synack_type { struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type); + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb); int tcp_disconnect(struct sock *sk, int flags); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); @@ -2035,7 +2036,8 @@ struct tcp_request_sock_ops { int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type); + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb); }; extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 010ed2abcb66..18d0e128bc3c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4175,8 +4175,9 @@ enum { BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x3F, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, }; /* List of known BPF sock_ops operators. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b520450170d1..8c9da4b65dae 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6824,7 +6824,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } if (fastopen_sk) { af_ops->send_synack(fastopen_sk, dst, &fl, req, - &foc, TCP_SYNACK_FASTOPEN); + &foc, TCP_SYNACK_FASTOPEN, skb); /* Add the child socket directly into the accept queue */ if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) { reqsk_fastopen_remove(fastopen_sk, req, false); @@ -6842,7 +6842,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_timeout_init((struct sock *)req)); af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : - TCP_SYNACK_COOKIE); + TCP_SYNACK_COOKIE, + skb); if (want_cookie) { reqsk_free(req); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5084333b5ab6..631a5ee0dd4e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -965,7 +965,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type) + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -976,7 +977,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, foc, synack_type); + skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 44ffa4891beb..673db6879e46 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -438,6 +438,7 @@ struct tcp_out_options { u8 ws; /* window scale, 0 to disable */ u8 num_sack_blocks; /* number of SACK blocks to include */ u8 hash_size; /* bytes in hash_location */ + u8 bpf_opt_len; /* length of BPF hdr option */ __u8 *hash_location; /* temporary pointer, overloaded */ __u32 tsval, tsecr; /* need to include OPTION_TS */ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ @@ -452,6 +453,59 @@ static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts) #endif } +#ifdef CONFIG_CGROUP_BPF +/* req, syn_skb and synack_type are used when writing synack */ +static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct sk_buff *syn_skb, + enum tcp_synack_type synack_type, + struct tcp_out_options *opts, + unsigned int *remaining) +{ + if (likely(!BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)) || + !*remaining) + return; + + /* The bpf running context preparation and the actual bpf prog + * calling will be implemented in a later PATCH together with + * other bpf pieces. + */ +} + +static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct sk_buff *syn_skb, + enum tcp_synack_type synack_type, + struct tcp_out_options *opts) +{ + if (likely(!opts->bpf_opt_len)) + return; + + /* The bpf running context preparation and the actual bpf prog + * calling will be implemented in a later PATCH together with + * other bpf pieces. + */ +} +#else +static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct sk_buff *syn_skb, + enum tcp_synack_type synack_type, + struct tcp_out_options *opts, + unsigned int *remaining) +{ +} + +static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct sk_buff *syn_skb, + enum tcp_synack_type synack_type, + struct tcp_out_options *opts) +{ +} +#endif + /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -691,6 +745,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, } } + bpf_skops_hdr_opt_len(sk, skb, NULL, NULL, 0, opts, &remaining); + return MAX_TCP_OPTION_SPACE - remaining; } @@ -701,7 +757,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type) + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -758,6 +815,9 @@ static unsigned int tcp_synack_options(const struct sock *sk, smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); + bpf_skops_hdr_opt_len((struct sock *)sk, skb, req, syn_skb, + synack_type, opts, &remaining); + return MAX_TCP_OPTION_SPACE - remaining; } @@ -826,6 +886,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; } + if (unlikely(BPF_SOCK_OPS_TEST_FLAG(tp, + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG))) { + unsigned int remaining = MAX_TCP_OPTION_SPACE - size; + + bpf_skops_hdr_opt_len(sk, skb, NULL, NULL, 0, opts, &remaining); + + size = MAX_TCP_OPTION_SPACE - remaining; + } + return size; } @@ -1213,6 +1282,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, } #endif + /* BPF prog is the last one writing header option */ + bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts); + INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check, tcp_v6_send_check, tcp_v4_send_check, sk, skb); @@ -3336,20 +3408,20 @@ int tcp_send_synack(struct sock *sk) } /** - * tcp_make_synack - Prepare a SYN-ACK. - * sk: listener socket - * dst: dst entry attached to the SYNACK - * req: request_sock pointer - * foc: cookie for tcp fast open - * synack_type: Type of synback to prepare - * - * Allocate one skb and build a SYNACK packet. - * @dst is consumed : Caller should not use it again. + * tcp_make_synack - Allocate one skb and build a SYNACK packet. + * @sk: listener socket + * @dst: dst entry attached to the SYNACK. It is consumed and caller + * should not use it again. + * @req: request_sock pointer + * @foc: cookie for tcp fast open + * @synack_type: Type of synack to prepare + * @syn_skb: SYN packet just received. It could be NULL for rtx case. */ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type) + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk); @@ -3408,8 +3480,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); + /* bpf program will be interested in the tcp_flags */ + TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK; tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc, synack_type) + sizeof(*th); + foc, synack_type, + syn_skb) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -3441,6 +3516,9 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, rcu_read_unlock(); #endif + bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, + synack_type, &opts); + skb->skb_mstamp_ns = now; tcp_add_tx_delay(skb, tp); @@ -3936,7 +4014,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) int res; tcp_rsk(req)->txhash = net_tx_rndhash(); - res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL); + res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL, + NULL); if (!res) { __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 305870a72352..87a633e1fbef 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -501,7 +501,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type) + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = tcp_inet6_sk(sk); @@ -515,7 +516,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, IPPROTO_TCP)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, foc, synack_type); + skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 010ed2abcb66..18d0e128bc3c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4175,8 +4175,9 @@ enum { BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x3F, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, }; /* List of known BPF sock_ops operators. -- cgit v1.3.1 From 0813a841566f0962a5551be7749b43c45f0022a0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:01:04 -0700 Subject: bpf: tcp: Allow bpf prog to write and parse TCP header option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Note: The TCP changes here is mainly to implement the bpf pieces into the bpf_skops_*() functions introduced in the earlier patches. ] The earlier effort in BPF-TCP-CC allows the TCP Congestion Control algorithm to be written in BPF. It opens up opportunities to allow a faster turnaround time in testing/releasing new congestion control ideas to production environment. The same flexibility can be extended to writing TCP header option. It is not uncommon that people want to test new TCP header option to improve the TCP performance. Another use case is for data-center that has a more controlled environment and has more flexibility in putting header options for internal only use. For example, we want to test the idea in putting maximum delay ACK in TCP header option which is similar to a draft RFC proposal [1]. This patch introduces the necessary BPF API and use them in the TCP stack to allow BPF_PROG_TYPE_SOCK_OPS program to parse and write TCP header options. It currently supports most of the TCP packet except RST. Supported TCP header option: ─────────────────────────── This patch allows the bpf-prog to write any option kind. Different bpf-progs can write its own option by calling the new helper bpf_store_hdr_opt(). The helper will ensure there is no duplicated option in the header. By allowing bpf-prog to write any option kind, this gives a lot of flexibility to the bpf-prog. Different bpf-prog can write its own option kind. It could also allow the bpf-prog to support a recently standardized option on an older kernel. Sockops Callback Flags: ────────────────────── The bpf program will only be called to parse/write tcp header option if the following newly added callback flags are enabled in tp->bpf_sock_ops_cb_flags: BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG A few words on the PARSE CB flags. When the above PARSE CB flags are turned on, the bpf-prog will be called on packets received at a sk that has at least reached the ESTABLISHED state. The parsing of the SYN-SYNACK-ACK will be discussed in the "3 Way HandShake" section. The default is off for all of the above new CB flags, i.e. the bpf prog will not be called to parse or write bpf hdr option. There are details comment on these new cb flags in the UAPI bpf.h. sock_ops->skb_data and bpf_load_hdr_opt() ───────────────────────────────────────── sock_ops->skb_data and sock_ops->skb_data_end covers the whole TCP header and its options. They are read only. The new bpf_load_hdr_opt() helps to read a particular option "kind" from the skb_data. Please refer to the comment in UAPI bpf.h. It has details on what skb_data contains under different sock_ops->op. 3 Way HandShake ─────────────── The bpf-prog can learn if it is sending SYN or SYNACK by reading the sock_ops->skb_tcp_flags. * Passive side When writing SYNACK (i.e. sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB), the received SYN skb will be available to the bpf prog. The bpf prog can use the SYN skb (which may carry the header option sent from the remote bpf prog) to decide what bpf header option should be written to the outgoing SYNACK skb. The SYN packet can be obtained by getsockopt(TCP_BPF_SYN*). More on this later. Also, the bpf prog can learn if it is in syncookie mode (by checking sock_ops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE). The bpf prog can store the received SYN pkt by using the existing bpf_setsockopt(TCP_SAVE_SYN). The example in a later patch does it. [ Note that the fullsock here is a listen sk, bpf_sk_storage is not very useful here since the listen sk will be shared by many concurrent connection requests. Extending bpf_sk_storage support to request_sock will add weight to the minisock and it is not necessary better than storing the whole ~100 bytes SYN pkt. ] When the connection is established, the bpf prog will be called in the existing PASSIVE_ESTABLISHED_CB callback. At that time, the bpf prog can get the header option from the saved syn and then apply the needed operation to the newly established socket. The later patch will use the max delay ack specified in the SYN header and set the RTO of this newly established connection as an example. The received ACK (that concludes the 3WHS) will also be available to the bpf prog during PASSIVE_ESTABLISHED_CB through the sock_ops->skb_data. It could be useful in syncookie scenario. More on this later. There is an existing getsockopt "TCP_SAVED_SYN" to return the whole saved syn pkt which includes the IP[46] header and the TCP header. A few "TCP_BPF_SYN*" getsockopt has been added to allow specifying where to start getting from, e.g. starting from TCP header, or from IP[46] header. The new getsockopt(TCP_BPF_SYN*) will also know where it can get the SYN's packet from: - (a) the just received syn (available when the bpf prog is writing SYNACK) and it is the only way to get SYN during syncookie mode. or - (b) the saved syn (available in PASSIVE_ESTABLISHED_CB and also other existing CB). The bpf prog does not need to know where the SYN pkt is coming from. The getsockopt(TCP_BPF_SYN*) will hide this details. Similarly, a flags "BPF_LOAD_HDR_OPT_TCP_SYN" is also added to bpf_load_hdr_opt() to read a particular header option from the SYN packet. * Fastopen Fastopen should work the same as the regular non fastopen case. This is a test in a later patch. * Syncookie For syncookie, the later example patch asks the active side's bpf prog to resend the header options in ACK. The server can use bpf_load_hdr_opt() to look at the options in this received ACK during PASSIVE_ESTABLISHED_CB. * Active side The bpf prog will get a chance to write the bpf header option in the SYN packet during WRITE_HDR_OPT_CB. The received SYNACK pkt will also be available to the bpf prog during the existing ACTIVE_ESTABLISHED_CB callback through the sock_ops->skb_data and bpf_load_hdr_opt(). * Turn off header CB flags after 3WHS If the bpf prog does not need to write/parse header options beyond the 3WHS, the bpf prog can clear the bpf_sock_ops_cb_flags to avoid being called for header options. Or the bpf-prog can select to leave the UNKNOWN_HDR_OPT_CB_FLAG on so that the kernel will only call it when there is option that the kernel cannot handle. [1]: draft-wang-tcpm-low-latency-opt-00 https://tools.ietf.org/html/draft-wang-tcpm-low-latency-opt-00 Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820190104.2885895-1-kafai@fb.com --- include/linux/bpf-cgroup.h | 25 +++ include/linux/filter.h | 4 + include/net/tcp.h | 49 ++++++ include/uapi/linux/bpf.h | 300 ++++++++++++++++++++++++++++++++- net/core/filter.c | 365 +++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 20 ++- net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 104 +++++++++++- tools/include/uapi/linux/bpf.h | 300 ++++++++++++++++++++++++++++++++- 9 files changed, 1150 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 64f367044e25..2f98d2fce62e 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -279,6 +279,31 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL) +/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a + * fullsock and its parent fullsock cannot be traced by + * sk_to_full_sk(). + * + * e.g. sock_ops->sk is a request_sock and it is under syncookie mode. + * Its listener-sk is not attached to the rsk_listener. + * In this case, the caller holds the listener-sk (unlocked), + * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with + * the listener-sk such that the cgroup-bpf-progs of the + * listener-sk will be run. + * + * Regardless of syncookie mode or not, + * calling bpf_setsockopt on listener-sk will not make sense anyway, + * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here. + */ +#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_sock_ops(sk, \ + sock_ops, \ + BPF_CGROUP_SOCK_OPS); \ + __ret; \ +}) + #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ int __ret = 0; \ diff --git a/include/linux/filter.h b/include/linux/filter.h index c427dfa5f908..995625950cc1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1241,8 +1241,12 @@ struct bpf_sock_ops_kern { u32 reply; u32 replylong[4]; }; + struct sk_buff *syn_skb; + struct sk_buff *skb; + void *skb_data_end; u8 op; u8 is_fullsock; + u8 remaining_opt_len; u64 temp; /* temp and everything after is not * initialized to 0 before calling * the BPF program. New fields that diff --git a/include/net/tcp.h b/include/net/tcp.h index 3e768a6b8264..1f967b4e22f6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2235,6 +2235,55 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); #endif /* CONFIG_NET_SOCK_MSG */ +#ifdef CONFIG_CGROUP_BPF +/* Copy the listen sk's HDR_OPT_CB flags to its child. + * + * During 3-Way-HandShake, the synack is usually sent from + * the listen sk with the HDR_OPT_CB flags set so that + * bpf-prog will be called to write the BPF hdr option. + * + * In fastopen, the child sk is used to send synack instead + * of the listen sk. Thus, inheriting the HDR_OPT_CB flags + * from the listen sk gives the bpf-prog a chance to write + * BPF hdr option in the synack pkt during fastopen. + * + * Both fastopen and non-fastopen child will inherit the + * HDR_OPT_CB flags to keep the bpf-prog having a consistent + * behavior when deciding to clear this cb flags (or not) + * during the PASSIVE_ESTABLISHED_CB. + * + * In the future, other cb flags could be inherited here also. + */ +static inline void bpf_skops_init_child(const struct sock *sk, + struct sock *child) +{ + tcp_sk(child)->bpf_sock_ops_cb_flags = + tcp_sk(sk)->bpf_sock_ops_cb_flags & + (BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); +} + +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, + struct sk_buff *skb, + unsigned int end_offset) +{ + skops->skb = skb; + skops->skb_data_end = skb->data + end_offset; +} +#else +static inline void bpf_skops_init_child(const struct sock *sk, + struct sock *child) +{ +} + +static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, + struct sk_buff *skb, + unsigned int end_offset) +{ +} +#endif + /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF * program does not support the chosen operation or there is no BPF diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 18d0e128bc3c..f67ec5d9e57d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3395,6 +3395,120 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * + * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) + * Description + * Load header option. Support reading a particular TCP header + * option for bpf program (BPF_PROG_TYPE_SOCK_OPS). + * + * If *flags* is 0, it will search the option from the + * sock_ops->skb_data. The comment in "struct bpf_sock_ops" + * has details on what skb_data contains under different + * sock_ops->op. + * + * The first byte of the *searchby_res* specifies the + * kind that it wants to search. + * + * If the searching kind is an experimental kind + * (i.e. 253 or 254 according to RFC6994). It also + * needs to specify the "magic" which is either + * 2 bytes or 4 bytes. It then also needs to + * specify the size of the magic by using + * the 2nd byte which is "kind-length" of a TCP + * header option and the "kind-length" also + * includes the first 2 bytes "kind" and "kind-length" + * itself as a normal TCP header option also does. + * + * For example, to search experimental kind 254 with + * 2 byte magic 0xeB9F, the searchby_res should be + * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. + * + * To search for the standard window scale option (3), + * the searchby_res should be [ 3, 0, 0, .... 0 ]. + * Note, kind-length must be 0 for regular option. + * + * Searching for No-Op (0) and End-of-Option-List (1) are + * not supported. + * + * *len* must be at least 2 bytes which is the minimal size + * of a header option. + * + * Supported flags: + * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the + * saved_syn packet or the just-received syn packet. + * + * Return + * >0 when found, the header option is copied to *searchby_res*. + * The return value is the total length copied. + * + * **-EINVAL** If param is invalid + * + * **-ENOMSG** The option is not found + * + * **-ENOENT** No syn packet available when + * **BPF_LOAD_HDR_OPT_TCP_SYN** is used + * + * **-ENOSPC** Not enough space. Only *len* number of + * bytes are copied. + * + * **-EFAULT** Cannot parse the header options in the packet + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. + * + * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags) + * Description + * Store header option. The data will be copied + * from buffer *from* with length *len* to the TCP header. + * + * The buffer *from* should have the whole option that + * includes the kind, kind-length, and the actual + * option data. The *len* must be at least kind-length + * long. The kind-length does not have to be 4 byte + * aligned. The kernel will take care of the padding + * and setting the 4 bytes aligned value to th->doff. + * + * This helper will check for duplicated option + * by searching the same option in the outgoing skb. + * + * This helper can only be called during + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** If param is invalid + * + * **-ENOSPC** Not enough space in the header. + * Nothing has been written + * + * **-EEXIST** The option has already existed + * + * **-EFAULT** Cannot parse the existing header options + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. + * + * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags) + * Description + * Reserve *len* bytes for the bpf header option. The + * space will be used by bpf_store_hdr_opt() later in + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * If bpf_reserve_hdr_opt() is called multiple times, + * the total number of bytes will be reserved. + * + * This helper can only be called during + * BPF_SOCK_OPS_HDR_OPT_LEN_CB. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** if param is invalid + * + * **-ENOSPC** Not enough space in the header. + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3539,6 +3653,9 @@ union bpf_attr { FN(skc_to_tcp_request_sock), \ FN(skc_to_udp6_sock), \ FN(get_task_stack), \ + FN(load_hdr_opt), \ + FN(store_hdr_opt), \ + FN(reserve_hdr_opt), /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4165,6 +4282,36 @@ struct bpf_sock_ops { __u64 bytes_received; __u64 bytes_acked; __bpf_md_ptr(struct bpf_sock *, sk); + /* [skb_data, skb_data_end) covers the whole TCP header. + * + * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received + * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the + * header has not been written. + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have + * been written so far. + * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes + * the 3WHS. + * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes + * the 3WHS. + * + * bpf_load_hdr_opt() can also be used to read a particular option. + */ + __bpf_md_ptr(void *, skb_data); + __bpf_md_ptr(void *, skb_data_end); + __u32 skb_len; /* The total length of a packet. + * It includes the header, options, + * and payload. + */ + __u32 skb_tcp_flags; /* tcp_flags of the header. It provides + * an easy way to check for tcp_flags + * without parsing skb_data. + * + * In particular, the skb_tcp_flags + * will still be available in + * BPF_SOCK_OPS_HDR_OPT_LEN even though + * the outgoing header has not + * been written yet. + */ }; /* Definitions for bpf_sock_ops_cb_flags */ @@ -4173,8 +4320,48 @@ enum { BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), - BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + /* Call bpf for all received TCP headers. The bpf prog will be + * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + * + * It could be used at the client/active side (i.e. connect() side) + * when the server told it that the server was in syncookie + * mode and required the active side to resend the bpf-written + * options. The active side can keep writing the bpf-options until + * it received a valid packet from the server side to confirm + * the earlier packet (and options) has been received. The later + * example patch is using it like this at the active side when the + * server is in syncookie mode. + * + * The bpf prog will usually turn this off in the common cases. + */ + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + /* Call bpf when kernel has received a header option that + * the kernel cannot handle. The bpf prog will be called under + * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + */ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), + /* Call bpf when the kernel is writing header options for the + * outgoing packet. The bpf prog will first be called + * to reserve space in a skb under + * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then + * the bpf prog will be called to write the header option(s) + * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB + * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option + * related helpers that will be useful to the bpf programs. + * + * The kernel gets its chance to reserve space and write + * options first before the BPF program does. + */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), /* Mask of all currently supported cb flags */ BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, @@ -4233,6 +4420,63 @@ enum { */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. */ + BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. + * It will be called to handle + * the packets received at + * an already established + * connection. + * + * sock_ops->skb_data: + * Referring to the received skb. + * It covers the TCP header only. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option. + */ + BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the + * header option later in + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Not available because no header has + * been written yet. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the + * outgoing skb. (e.g. SYN, ACK, FIN). + * + * bpf_reserve_hdr_opt() should + * be used to reserve space. + */ + BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Referring to the outgoing skb. + * It covers the TCP header + * that has already been written + * by the kernel and the + * earlier bpf-progs. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the outgoing + * skb. (e.g. SYN, ACK, FIN). + * + * bpf_store_hdr_opt() should + * be used to write the + * option. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option that + * has already been written + * by the kernel or the + * earlier bpf-progs. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect @@ -4262,6 +4506,60 @@ enum { TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ + /* Copy the SYN pkt to optval + * + * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the + * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit + * to only getting from the saved_syn. It can either get the + * syn packet from: + * + * 1. the just-received SYN packet (only available when writing the + * SYNACK). It will be useful when it is not necessary to + * save the SYN packet for latter use. It is also the only way + * to get the SYN during syncookie mode because the syn + * packet cannot be saved during syncookie. + * + * OR + * + * 2. the earlier saved syn which was done by + * bpf_setsockopt(TCP_SAVE_SYN). + * + * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the + * SYN packet is obtained. + * + * If the bpf-prog does not need the IP[46] header, the + * bpf-prog can avoid parsing the IP header by using + * TCP_BPF_SYN. Otherwise, the bpf-prog can get both + * IP[46] and TCP header by using TCP_BPF_SYN_IP. + * + * >0: Total number of bytes copied + * -ENOSPC: Not enough space in optval. Only optlen number of + * bytes is copied. + * -ENOENT: The SYN skb is not available now and the earlier SYN pkt + * is not saved by setsockopt(TCP_SAVE_SYN). + */ + TCP_BPF_SYN = 1005, /* Copy the TCP header */ + TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ +}; + +enum { + BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0), +}; + +/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + */ +enum { + BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the + * total option spaces + * required for an established + * sk in order to calculate the + * MSS. No skb is actually + * sent. + */ + BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode + * when sending a SYN. + */ }; struct bpf_perf_event_value { diff --git a/net/core/filter.c b/net/core/filter.c index 1608f4b3987f..ab5603d5b62a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4669,9 +4669,82 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock, + int optname, const u8 **start) +{ + struct sk_buff *syn_skb = bpf_sock->syn_skb; + const u8 *hdr_start; + int ret; + + if (syn_skb) { + /* sk is a request_sock here */ + + if (optname == TCP_BPF_SYN) { + hdr_start = syn_skb->data; + ret = tcp_hdrlen(syn_skb); + } else { + /* optname == TCP_BPF_SYN_IP */ + hdr_start = skb_network_header(syn_skb); + ret = skb_network_header_len(syn_skb) + + tcp_hdrlen(syn_skb); + } + } else { + struct sock *sk = bpf_sock->sk; + struct saved_syn *saved_syn; + + if (sk->sk_state == TCP_NEW_SYN_RECV) + /* synack retransmit. bpf_sock->syn_skb will + * not be available. It has to resort to + * saved_syn (if it is saved). + */ + saved_syn = inet_reqsk(sk)->saved_syn; + else + saved_syn = tcp_sk(sk)->saved_syn; + + if (!saved_syn) + return -ENOENT; + + if (optname == TCP_BPF_SYN) { + hdr_start = saved_syn->data + + saved_syn->network_hdrlen; + ret = saved_syn->tcp_hdrlen; + } else { + /* optname == TCP_BPF_SYN_IP */ + hdr_start = saved_syn->data; + ret = saved_syn->network_hdrlen + + saved_syn->tcp_hdrlen; + } + } + + *start = hdr_start; + return ret; +} + BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, int, level, int, optname, char *, optval, int, optlen) { + if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP && + optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_IP) { + int ret, copy_len = 0; + const u8 *start; + + ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start); + if (ret > 0) { + copy_len = ret; + if (optlen < copy_len) { + copy_len = optlen; + ret = -ENOSPC; + } + + memcpy(optval, start, copy_len); + } + + /* Zero out unused buffer at the end */ + memset(optval + copy_len, 0, optlen - copy_len); + + return ret; + } + return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen); } @@ -6165,6 +6238,232 @@ static const struct bpf_func_proto bpf_sk_assign_proto = { .arg3_type = ARG_ANYTHING, }; +static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend, + u8 search_kind, const u8 *magic, + u8 magic_len, bool *eol) +{ + u8 kind, kind_len; + + *eol = false; + + while (op < opend) { + kind = op[0]; + + if (kind == TCPOPT_EOL) { + *eol = true; + return ERR_PTR(-ENOMSG); + } else if (kind == TCPOPT_NOP) { + op++; + continue; + } + + if (opend - op < 2 || opend - op < op[1] || op[1] < 2) + /* Something is wrong in the received header. + * Follow the TCP stack's tcp_parse_options() + * and just bail here. + */ + return ERR_PTR(-EFAULT); + + kind_len = op[1]; + if (search_kind == kind) { + if (!magic_len) + return op; + + if (magic_len > kind_len - 2) + return ERR_PTR(-ENOMSG); + + if (!memcmp(&op[2], magic, magic_len)) + return op; + } + + op += kind_len; + } + + return ERR_PTR(-ENOMSG); +} + +BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock, + void *, search_res, u32, len, u64, flags) +{ + bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN; + const u8 *op, *opend, *magic, *search = search_res; + u8 search_kind, search_len, copy_len, magic_len; + int ret; + + /* 2 byte is the minimal option len except TCPOPT_NOP and + * TCPOPT_EOL which are useless for the bpf prog to learn + * and this helper disallow loading them also. + */ + if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN) + return -EINVAL; + + search_kind = search[0]; + search_len = search[1]; + + if (search_len > len || search_kind == TCPOPT_NOP || + search_kind == TCPOPT_EOL) + return -EINVAL; + + if (search_kind == TCPOPT_EXP || search_kind == 253) { + /* 16 or 32 bit magic. +2 for kind and kind length */ + if (search_len != 4 && search_len != 6) + return -EINVAL; + magic = &search[2]; + magic_len = search_len - 2; + } else { + if (search_len) + return -EINVAL; + magic = NULL; + magic_len = 0; + } + + if (load_syn) { + ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op); + if (ret < 0) + return ret; + + opend = op + ret; + op += sizeof(struct tcphdr); + } else { + if (!bpf_sock->skb || + bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB) + /* This bpf_sock->op cannot call this helper */ + return -EPERM; + + opend = bpf_sock->skb_data_end; + op = bpf_sock->skb->data + sizeof(struct tcphdr); + } + + op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len, + &eol); + if (IS_ERR(op)) + return PTR_ERR(op); + + copy_len = op[1]; + ret = copy_len; + if (copy_len > len) { + ret = -ENOSPC; + copy_len = len; + } + + memcpy(search_res, op, copy_len); + return ret; +} + +static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = { + .func = bpf_sock_ops_load_hdr_opt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock, + const void *, from, u32, len, u64, flags) +{ + u8 new_kind, new_kind_len, magic_len = 0, *opend; + const u8 *op, *new_op, *magic = NULL; + struct sk_buff *skb; + bool eol; + + if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB) + return -EPERM; + + if (len < 2 || flags) + return -EINVAL; + + new_op = from; + new_kind = new_op[0]; + new_kind_len = new_op[1]; + + if (new_kind_len > len || new_kind == TCPOPT_NOP || + new_kind == TCPOPT_EOL) + return -EINVAL; + + if (new_kind_len > bpf_sock->remaining_opt_len) + return -ENOSPC; + + /* 253 is another experimental kind */ + if (new_kind == TCPOPT_EXP || new_kind == 253) { + if (new_kind_len < 4) + return -EINVAL; + /* Match for the 2 byte magic also. + * RFC 6994: the magic could be 2 or 4 bytes. + * Hence, matching by 2 byte only is on the + * conservative side but it is the right + * thing to do for the 'search-for-duplication' + * purpose. + */ + magic = &new_op[2]; + magic_len = 2; + } + + /* Check for duplication */ + skb = bpf_sock->skb; + op = skb->data + sizeof(struct tcphdr); + opend = bpf_sock->skb_data_end; + + op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len, + &eol); + if (!IS_ERR(op)) + return -EEXIST; + + if (PTR_ERR(op) != -ENOMSG) + return PTR_ERR(op); + + if (eol) + /* The option has been ended. Treat it as no more + * header option can be written. + */ + return -ENOSPC; + + /* No duplication found. Store the header option. */ + memcpy(opend, from, new_kind_len); + + bpf_sock->remaining_opt_len -= new_kind_len; + bpf_sock->skb_data_end += new_kind_len; + + return 0; +} + +static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = { + .func = bpf_sock_ops_store_hdr_opt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock, + u32, len, u64, flags) +{ + if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB) + return -EPERM; + + if (flags || len < 2) + return -EINVAL; + + if (len > bpf_sock->remaining_opt_len) + return -ENOSPC; + + bpf_sock->remaining_opt_len -= len; + + return 0; +} + +static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = { + .func = bpf_sock_ops_reserve_hdr_opt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -6193,6 +6492,9 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_lwt_seg6_store_bytes || func == bpf_lwt_seg6_adjust_srh || func == bpf_lwt_seg6_action || +#endif +#ifdef CONFIG_INET + func == bpf_sock_ops_store_hdr_opt || #endif func == bpf_lwt_in_push_encap || func == bpf_lwt_xmit_push_encap) @@ -6565,6 +6867,12 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; #ifdef CONFIG_INET + case BPF_FUNC_load_hdr_opt: + return &bpf_sock_ops_load_hdr_opt_proto; + case BPF_FUNC_store_hdr_opt: + return &bpf_sock_ops_store_hdr_opt_proto; + case BPF_FUNC_reserve_hdr_opt: + return &bpf_sock_ops_reserve_hdr_opt_proto; case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ @@ -7364,6 +7672,20 @@ static bool sock_ops_is_valid_access(int off, int size, return false; info->reg_type = PTR_TO_SOCKET_OR_NULL; break; + case offsetof(struct bpf_sock_ops, skb_data): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_PACKET; + break; + case offsetof(struct bpf_sock_ops, skb_data_end): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_PACKET_END; + break; + case offsetof(struct bpf_sock_ops, skb_tcp_flags): + bpf_ctx_record_field_size(info, size_default); + return bpf_ctx_narrow_access_ok(off, size, + size_default); default: if (size != size_default) return false; @@ -8701,6 +9023,49 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, case offsetof(struct bpf_sock_ops, sk): SOCK_OPS_GET_SK(); break; + case offsetof(struct bpf_sock_ops, skb_data_end): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, + skb_data_end), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, + skb_data_end)); + break; + case offsetof(struct bpf_sock_ops, skb_data): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, + skb), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, + skb)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), + si->dst_reg, si->dst_reg, + offsetof(struct sk_buff, data)); + break; + case offsetof(struct bpf_sock_ops, skb_len): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, + skb), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, + skb)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len), + si->dst_reg, si->dst_reg, + offsetof(struct sk_buff, len)); + break; + case offsetof(struct bpf_sock_ops, skb_tcp_flags): + off = offsetof(struct sk_buff, cb); + off += offsetof(struct tcp_skb_cb, tcp_flags); + *target_size = sizeof_field(struct tcp_skb_cb, tcp_flags); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, + skb), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, + skb)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb, + tcp_flags), + si->dst_reg, si->dst_reg, off); + break; } return insn - insn_buf; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8c9da4b65dae..319cc7fd5117 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -146,6 +146,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb) BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG); bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); + struct bpf_sock_ops_kern sock_ops; if (likely(!unknown_opt && !parse_all_opt)) return; @@ -161,12 +162,15 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb) return; } - /* BPF prog will have access to the sk and skb. - * - * The bpf running context preparation and the actual bpf prog - * calling will be implemented in a later PATCH together with - * other bpf pieces. - */ + sock_owned_by_me(sk); + + memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); + sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB; + sock_ops.is_fullsock = 1; + sock_ops.sk = sk; + bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb)); + + BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); } static void bpf_skops_established(struct sock *sk, int bpf_op, @@ -180,7 +184,9 @@ static void bpf_skops_established(struct sock *sk, int bpf_op, sock_ops.op = bpf_op; sock_ops.is_fullsock = 1; sock_ops.sk = sk; - /* skb will be passed to the bpf prog in a later patch. */ + /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */ + if (skb) + bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb)); BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 495dda2449fe..56c306e3cd2f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -548,6 +548,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fastopen_req = NULL; RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); + bpf_skops_init_child(sk, newsk); tcp_bpf_clone(sk, newsk); __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 673db6879e46..ab79d36ed07f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -454,6 +454,18 @@ static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts) } #ifdef CONFIG_CGROUP_BPF +static int bpf_skops_write_hdr_opt_arg0(struct sk_buff *skb, + enum tcp_synack_type synack_type) +{ + if (unlikely(!skb)) + return BPF_WRITE_HDR_TCP_CURRENT_MSS; + + if (unlikely(synack_type == TCP_SYNACK_COOKIE)) + return BPF_WRITE_HDR_TCP_SYNACK_COOKIE; + + return 0; +} + /* req, syn_skb and synack_type are used when writing synack */ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -462,15 +474,60 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, unsigned int *remaining) { + struct bpf_sock_ops_kern sock_ops; + int err; + if (likely(!BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)) || !*remaining) return; - /* The bpf running context preparation and the actual bpf prog - * calling will be implemented in a later PATCH together with - * other bpf pieces. - */ + /* *remaining has already been aligned to 4 bytes, so *remaining >= 4 */ + + /* init sock_ops */ + memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); + + sock_ops.op = BPF_SOCK_OPS_HDR_OPT_LEN_CB; + + if (req) { + /* The listen "sk" cannot be passed here because + * it is not locked. It would not make too much + * sense to do bpf_setsockopt(listen_sk) based + * on individual connection request also. + * + * Thus, "req" is passed here and the cgroup-bpf-progs + * of the listen "sk" will be run. + * + * "req" is also used here for fastopen even the "sk" here is + * a fullsock "child" sk. It is to keep the behavior + * consistent between fastopen and non-fastopen on + * the bpf programming side. + */ + sock_ops.sk = (struct sock *)req; + sock_ops.syn_skb = syn_skb; + } else { + sock_owned_by_me(sk); + + sock_ops.is_fullsock = 1; + sock_ops.sk = sk; + } + + sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type); + sock_ops.remaining_opt_len = *remaining; + /* tcp_current_mss() does not pass a skb */ + if (skb) + bpf_skops_init_skb(&sock_ops, skb, 0); + + err = BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk); + + if (err || sock_ops.remaining_opt_len == *remaining) + return; + + opts->bpf_opt_len = *remaining - sock_ops.remaining_opt_len; + /* round up to 4 bytes */ + opts->bpf_opt_len = (opts->bpf_opt_len + 3) & ~3; + + *remaining -= opts->bpf_opt_len; } static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, @@ -479,13 +536,42 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, enum tcp_synack_type synack_type, struct tcp_out_options *opts) { - if (likely(!opts->bpf_opt_len)) + u8 first_opt_off, nr_written, max_opt_len = opts->bpf_opt_len; + struct bpf_sock_ops_kern sock_ops; + int err; + + if (likely(!max_opt_len)) return; - /* The bpf running context preparation and the actual bpf prog - * calling will be implemented in a later PATCH together with - * other bpf pieces. - */ + memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); + + sock_ops.op = BPF_SOCK_OPS_WRITE_HDR_OPT_CB; + + if (req) { + sock_ops.sk = (struct sock *)req; + sock_ops.syn_skb = syn_skb; + } else { + sock_owned_by_me(sk); + + sock_ops.is_fullsock = 1; + sock_ops.sk = sk; + } + + sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type); + sock_ops.remaining_opt_len = max_opt_len; + first_opt_off = tcp_hdrlen(skb) - max_opt_len; + bpf_skops_init_skb(&sock_ops, skb, first_opt_off); + + err = BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk); + + if (err) + nr_written = 0; + else + nr_written = max_opt_len - sock_ops.remaining_opt_len; + + if (nr_written < max_opt_len) + memset(skb->data + first_opt_off + nr_written, TCPOPT_NOP, + max_opt_len - nr_written); } #else static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 18d0e128bc3c..f67ec5d9e57d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3395,6 +3395,120 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * + * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) + * Description + * Load header option. Support reading a particular TCP header + * option for bpf program (BPF_PROG_TYPE_SOCK_OPS). + * + * If *flags* is 0, it will search the option from the + * sock_ops->skb_data. The comment in "struct bpf_sock_ops" + * has details on what skb_data contains under different + * sock_ops->op. + * + * The first byte of the *searchby_res* specifies the + * kind that it wants to search. + * + * If the searching kind is an experimental kind + * (i.e. 253 or 254 according to RFC6994). It also + * needs to specify the "magic" which is either + * 2 bytes or 4 bytes. It then also needs to + * specify the size of the magic by using + * the 2nd byte which is "kind-length" of a TCP + * header option and the "kind-length" also + * includes the first 2 bytes "kind" and "kind-length" + * itself as a normal TCP header option also does. + * + * For example, to search experimental kind 254 with + * 2 byte magic 0xeB9F, the searchby_res should be + * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. + * + * To search for the standard window scale option (3), + * the searchby_res should be [ 3, 0, 0, .... 0 ]. + * Note, kind-length must be 0 for regular option. + * + * Searching for No-Op (0) and End-of-Option-List (1) are + * not supported. + * + * *len* must be at least 2 bytes which is the minimal size + * of a header option. + * + * Supported flags: + * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the + * saved_syn packet or the just-received syn packet. + * + * Return + * >0 when found, the header option is copied to *searchby_res*. + * The return value is the total length copied. + * + * **-EINVAL** If param is invalid + * + * **-ENOMSG** The option is not found + * + * **-ENOENT** No syn packet available when + * **BPF_LOAD_HDR_OPT_TCP_SYN** is used + * + * **-ENOSPC** Not enough space. Only *len* number of + * bytes are copied. + * + * **-EFAULT** Cannot parse the header options in the packet + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. + * + * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags) + * Description + * Store header option. The data will be copied + * from buffer *from* with length *len* to the TCP header. + * + * The buffer *from* should have the whole option that + * includes the kind, kind-length, and the actual + * option data. The *len* must be at least kind-length + * long. The kind-length does not have to be 4 byte + * aligned. The kernel will take care of the padding + * and setting the 4 bytes aligned value to th->doff. + * + * This helper will check for duplicated option + * by searching the same option in the outgoing skb. + * + * This helper can only be called during + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** If param is invalid + * + * **-ENOSPC** Not enough space in the header. + * Nothing has been written + * + * **-EEXIST** The option has already existed + * + * **-EFAULT** Cannot parse the existing header options + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. + * + * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags) + * Description + * Reserve *len* bytes for the bpf header option. The + * space will be used by bpf_store_hdr_opt() later in + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * If bpf_reserve_hdr_opt() is called multiple times, + * the total number of bytes will be reserved. + * + * This helper can only be called during + * BPF_SOCK_OPS_HDR_OPT_LEN_CB. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** if param is invalid + * + * **-ENOSPC** Not enough space in the header. + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3539,6 +3653,9 @@ union bpf_attr { FN(skc_to_tcp_request_sock), \ FN(skc_to_udp6_sock), \ FN(get_task_stack), \ + FN(load_hdr_opt), \ + FN(store_hdr_opt), \ + FN(reserve_hdr_opt), /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4165,6 +4282,36 @@ struct bpf_sock_ops { __u64 bytes_received; __u64 bytes_acked; __bpf_md_ptr(struct bpf_sock *, sk); + /* [skb_data, skb_data_end) covers the whole TCP header. + * + * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received + * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the + * header has not been written. + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have + * been written so far. + * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes + * the 3WHS. + * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes + * the 3WHS. + * + * bpf_load_hdr_opt() can also be used to read a particular option. + */ + __bpf_md_ptr(void *, skb_data); + __bpf_md_ptr(void *, skb_data_end); + __u32 skb_len; /* The total length of a packet. + * It includes the header, options, + * and payload. + */ + __u32 skb_tcp_flags; /* tcp_flags of the header. It provides + * an easy way to check for tcp_flags + * without parsing skb_data. + * + * In particular, the skb_tcp_flags + * will still be available in + * BPF_SOCK_OPS_HDR_OPT_LEN even though + * the outgoing header has not + * been written yet. + */ }; /* Definitions for bpf_sock_ops_cb_flags */ @@ -4173,8 +4320,48 @@ enum { BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), - BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + /* Call bpf for all received TCP headers. The bpf prog will be + * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + * + * It could be used at the client/active side (i.e. connect() side) + * when the server told it that the server was in syncookie + * mode and required the active side to resend the bpf-written + * options. The active side can keep writing the bpf-options until + * it received a valid packet from the server side to confirm + * the earlier packet (and options) has been received. The later + * example patch is using it like this at the active side when the + * server is in syncookie mode. + * + * The bpf prog will usually turn this off in the common cases. + */ + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + /* Call bpf when kernel has received a header option that + * the kernel cannot handle. The bpf prog will be called under + * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + */ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), + /* Call bpf when the kernel is writing header options for the + * outgoing packet. The bpf prog will first be called + * to reserve space in a skb under + * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then + * the bpf prog will be called to write the header option(s) + * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB + * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option + * related helpers that will be useful to the bpf programs. + * + * The kernel gets its chance to reserve space and write + * options first before the BPF program does. + */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), /* Mask of all currently supported cb flags */ BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, @@ -4233,6 +4420,63 @@ enum { */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. */ + BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. + * It will be called to handle + * the packets received at + * an already established + * connection. + * + * sock_ops->skb_data: + * Referring to the received skb. + * It covers the TCP header only. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option. + */ + BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the + * header option later in + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Not available because no header has + * been written yet. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the + * outgoing skb. (e.g. SYN, ACK, FIN). + * + * bpf_reserve_hdr_opt() should + * be used to reserve space. + */ + BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Referring to the outgoing skb. + * It covers the TCP header + * that has already been written + * by the kernel and the + * earlier bpf-progs. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the outgoing + * skb. (e.g. SYN, ACK, FIN). + * + * bpf_store_hdr_opt() should + * be used to write the + * option. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option that + * has already been written + * by the kernel or the + * earlier bpf-progs. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect @@ -4262,6 +4506,60 @@ enum { TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ + /* Copy the SYN pkt to optval + * + * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the + * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit + * to only getting from the saved_syn. It can either get the + * syn packet from: + * + * 1. the just-received SYN packet (only available when writing the + * SYNACK). It will be useful when it is not necessary to + * save the SYN packet for latter use. It is also the only way + * to get the SYN during syncookie mode because the syn + * packet cannot be saved during syncookie. + * + * OR + * + * 2. the earlier saved syn which was done by + * bpf_setsockopt(TCP_SAVE_SYN). + * + * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the + * SYN packet is obtained. + * + * If the bpf-prog does not need the IP[46] header, the + * bpf-prog can avoid parsing the IP header by using + * TCP_BPF_SYN. Otherwise, the bpf-prog can get both + * IP[46] and TCP header by using TCP_BPF_SYN_IP. + * + * >0: Total number of bytes copied + * -ENOSPC: Not enough space in optval. Only optlen number of + * bytes is copied. + * -ENOENT: The SYN skb is not available now and the earlier SYN pkt + * is not saved by setsockopt(TCP_SAVE_SYN). + */ + TCP_BPF_SYN = 1005, /* Copy the TCP header */ + TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ +}; + +enum { + BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0), +}; + +/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + */ +enum { + BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the + * total option spaces + * required for an established + * sk in order to calculate the + * MSS. No skb is actually + * sent. + */ + BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode + * when sending a SYN. + */ }; struct bpf_perf_event_value { -- cgit v1.3.1 From 8085e1dc1f3c885e1e9c1ef8031b3eabc1cccf25 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:01:11 -0700 Subject: bpf: selftests: Add fastopen_connect to network_helpers This patch adds a fastopen_connect() helper which will be used in a later test. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820190111.2886196-1-kafai@fb.com --- tools/testing/selftests/bpf/network_helpers.c | 37 +++++++++++++++++++++++++++ tools/testing/selftests/bpf/network_helpers.h | 2 ++ 2 files changed, 39 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index f56655690f9b..12ee40284da0 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -104,6 +104,43 @@ error_close: return -1; } +int fastopen_connect(int server_fd, const char *data, unsigned int data_len, + int timeout_ms) +{ + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + struct sockaddr_in *addr_in; + int fd, ret; + + if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { + log_err("Failed to get server addr"); + return -1; + } + + addr_in = (struct sockaddr_in *)&addr; + fd = socket(addr_in->sin_family, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (settimeo(fd, timeout_ms)) + goto error_close; + + ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr, + addrlen); + if (ret != data_len) { + log_err("sendto(data, %u) != %d\n", data_len, ret); + goto error_close; + } + + return fd; + +error_close: + save_errno_close(fd); + return -1; +} + static int connect_fd_to_addr(int fd, const struct sockaddr_storage *addr, socklen_t addrlen) diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index c3728f6667e4..7205f8afdba1 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -37,6 +37,8 @@ int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int connect_to_fd(int server_fd, int timeout_ms); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); +int fastopen_connect(int server_fd, const char *data, unsigned int data_len, + int timeout_ms); int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); -- cgit v1.3.1 From ad2f8eb0095e9036724d9cf0eb6960f1e6d52d21 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:01:17 -0700 Subject: bpf: selftests: Tcp header options This patch adds tests for the new bpf tcp header option feature. test_tcp_hdr_options.c: - It tests header option writing and parsing in 3WHS: regular connection establishment, fastopen, and syncookie. - In syncookie, the passive side's bpf prog is asking the active side to resend its bpf header option by specifying a RESEND bit in the outgoing SYNACK. handle_active_estab() and write_nodata_opt() has some details. - handle_passive_estab() has comments on fastopen. - It also has test for header writing and parsing in FIN packet. - Most of the tests is writing an experimental option 254 with magic 0xeB9F. - The no_exprm_estab() also tests writing a regular TCP option without any magic. test_misc_tcp_options.c: - It is an one directional test. Active side writes option and passive side parses option. The focus is to exercise the new helpers and API. - Testing the new helper: bpf_load_hdr_opt() and bpf_store_hdr_opt(). - Testing the bpf_getsockopt(TCP_BPF_SYN). - Negative tests for the above helpers. - Testing the sock_ops->skb_data. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820190117.2886749-1-kafai@fb.com --- .../selftests/bpf/prog_tests/tcp_hdr_options.c | 622 ++++++++++++++++++++ .../bpf/progs/test_misc_tcp_hdr_options.c | 325 +++++++++++ .../selftests/bpf/progs/test_tcp_hdr_options.c | 623 +++++++++++++++++++++ tools/testing/selftests/bpf/test_tcp_hdr_options.h | 151 +++++ 4 files changed, 1721 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c create mode 100644 tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c create mode 100644 tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c create mode 100644 tools/testing/selftests/bpf/test_tcp_hdr_options.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c new file mode 100644 index 000000000000..24ba0d21b641 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "test_progs.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "test_tcp_hdr_options.h" +#include "test_tcp_hdr_options.skel.h" +#include "test_misc_tcp_hdr_options.skel.h" + +#define LO_ADDR6 "::eB9F" +#define CG_NAME "/tcpbpf-hdr-opt-test" + +struct bpf_test_option exp_passive_estab_in; +struct bpf_test_option exp_active_estab_in; +struct bpf_test_option exp_passive_fin_in; +struct bpf_test_option exp_active_fin_in; +struct hdr_stg exp_passive_hdr_stg; +struct hdr_stg exp_active_hdr_stg = { .active = true, }; + +static struct test_misc_tcp_hdr_options *misc_skel; +static struct test_tcp_hdr_options *skel; +static int lport_linum_map_fd; +static int hdr_stg_map_fd; +static __u32 duration; +static int cg_fd; + +struct sk_fds { + int srv_fd; + int passive_fd; + int active_fd; + int passive_lport; + int active_lport; +}; + +static int add_lo_addr(void) +{ + char ip_addr_cmd[256]; + int cmdlen; + + cmdlen = snprintf(ip_addr_cmd, sizeof(ip_addr_cmd), + "ip -6 addr add %s/128 dev lo scope host", + LO_ADDR6); + + if (CHECK(cmdlen >= sizeof(ip_addr_cmd), "compile ip cmd", + "failed to add host addr %s to lo. ip cmdlen is too long\n", + LO_ADDR6)) + return -1; + + if (CHECK(system(ip_addr_cmd), "run ip cmd", + "failed to add host addr %s to lo\n", LO_ADDR6)) + return -1; + + return 0; +} + +static int create_netns(void) +{ + if (CHECK(unshare(CLONE_NEWNET), "create netns", + "unshare(CLONE_NEWNET): %s (%d)", + strerror(errno), errno)) + return -1; + + if (CHECK(system("ip link set dev lo up"), "run ip cmd", + "failed to bring lo link up\n")) + return -1; + + if (add_lo_addr()) + return -1; + + return 0; +} + +static int write_sysctl(const char *sysctl, const char *value) +{ + int fd, err, len; + + fd = open(sysctl, O_WRONLY); + if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", + sysctl, strerror(errno), errno)) + return -1; + + len = strlen(value); + err = write(fd, value, len); + close(fd); + if (CHECK(err != len, "write sysctl", + "write(%s, %s): err:%d %s (%d)\n", + sysctl, value, err, strerror(errno), errno)) + return -1; + + return 0; +} + +static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix) +{ + fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n", + prefix ? : "", hdr_stg->active, hdr_stg->resend_syn, + hdr_stg->syncookie, hdr_stg->fastopen); +} + +static void print_option(const struct bpf_test_option *opt, const char *prefix) +{ + fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n", + prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand); +} + +static void sk_fds_close(struct sk_fds *sk_fds) +{ + close(sk_fds->srv_fd); + close(sk_fds->passive_fd); + close(sk_fds->active_fd); +} + +static int sk_fds_shutdown(struct sk_fds *sk_fds) +{ + int ret, abyte; + + shutdown(sk_fds->active_fd, SHUT_WR); + ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte)); + if (CHECK(ret != 0, "read-after-shutdown(passive_fd):", + "ret:%d %s (%d)\n", + ret, strerror(errno), errno)) + return -1; + + shutdown(sk_fds->passive_fd, SHUT_WR); + ret = read(sk_fds->active_fd, &abyte, sizeof(abyte)); + if (CHECK(ret != 0, "read-after-shutdown(active_fd):", + "ret:%d %s (%d)\n", + ret, strerror(errno), errno)) + return -1; + + return 0; +} + +static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open) +{ + const char fast[] = "FAST!!!"; + struct sockaddr_in6 addr6; + socklen_t len; + + sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); + if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n", + strerror(errno), errno)) + goto error; + + if (fast_open) + sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast, + sizeof(fast), 0); + else + sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0); + + if (CHECK_FAIL(sk_fds->active_fd == -1)) { + close(sk_fds->srv_fd); + goto error; + } + + len = sizeof(addr6); + if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6, + &len), "getsockname(srv_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + sk_fds->passive_lport = ntohs(addr6.sin6_port); + + len = sizeof(addr6); + if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6, + &len), "getsockname(active_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + sk_fds->active_lport = ntohs(addr6.sin6_port); + + sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0); + if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + + if (fast_open) { + char bytes_in[sizeof(fast)]; + int ret; + + ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in)); + if (CHECK(ret != sizeof(fast), "read fastopen syn data", + "expected=%lu actual=%d\n", sizeof(fast), ret)) { + close(sk_fds->passive_fd); + goto error_close; + } + } + + return 0; + +error_close: + close(sk_fds->active_fd); + close(sk_fds->srv_fd); + +error: + memset(sk_fds, -1, sizeof(*sk_fds)); + return -1; +} + +static int check_hdr_opt(const struct bpf_test_option *exp, + const struct bpf_test_option *act, + const char *hdr_desc) +{ + if (CHECK(memcmp(exp, act, sizeof(*exp)), + "expected-vs-actual", "unexpected %s\n", hdr_desc)) { + print_option(exp, "expected: "); + print_option(act, " actual: "); + return -1; + } + + return 0; +} + +static int check_hdr_stg(const struct hdr_stg *exp, int fd, + const char *stg_desc) +{ + struct hdr_stg act; + + if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act), + "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n", + stg_desc, strerror(errno), errno)) + return -1; + + if (CHECK(memcmp(exp, &act, sizeof(*exp)), + "expected-vs-actual", "unexpected %s\n", stg_desc)) { + print_hdr_stg(exp, "expected: "); + print_hdr_stg(&act, " actual: "); + return -1; + } + + return 0; +} + +static int check_error_linum(const struct sk_fds *sk_fds) +{ + unsigned int nr_errors = 0; + struct linum_err linum_err; + int lport; + + lport = sk_fds->passive_lport; + if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) { + fprintf(stderr, + "bpf prog error out at lport:passive(%d), linum:%u err:%d\n", + lport, linum_err.linum, linum_err.err); + nr_errors++; + } + + lport = sk_fds->active_lport; + if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) { + fprintf(stderr, + "bpf prog error out at lport:active(%d), linum:%u err:%d\n", + lport, linum_err.linum, linum_err.err); + nr_errors++; + } + + return nr_errors; +} + +static void check_hdr_and_close_fds(struct sk_fds *sk_fds) +{ + if (sk_fds_shutdown(sk_fds)) + goto check_linum; + + if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd, + "passive_hdr_stg")) + goto check_linum; + + if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd, + "active_hdr_stg")) + goto check_linum; + + if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in, + "passive_estab_in")) + goto check_linum; + + if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in, + "active_estab_in")) + goto check_linum; + + if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in, + "passive_fin_in")) + goto check_linum; + + check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in, + "active_fin_in"); + +check_linum: + CHECK_FAIL(check_error_linum(sk_fds)); + sk_fds_close(sk_fds); +} + +static void prepare_out(void) +{ + skel->bss->active_syn_out = exp_passive_estab_in; + skel->bss->passive_synack_out = exp_active_estab_in; + + skel->bss->active_fin_out = exp_passive_fin_in; + skel->bss->passive_fin_out = exp_active_fin_in; +} + +static void reset_test(void) +{ + size_t optsize = sizeof(struct bpf_test_option); + int lport, err; + + memset(&skel->bss->passive_synack_out, 0, optsize); + memset(&skel->bss->passive_fin_out, 0, optsize); + + memset(&skel->bss->passive_estab_in, 0, optsize); + memset(&skel->bss->passive_fin_in, 0, optsize); + + memset(&skel->bss->active_syn_out, 0, optsize); + memset(&skel->bss->active_fin_out, 0, optsize); + + memset(&skel->bss->active_estab_in, 0, optsize); + memset(&skel->bss->active_fin_in, 0, optsize); + + skel->data->test_kind = TCPOPT_EXP; + skel->data->test_magic = 0xeB9F; + + memset(&exp_passive_estab_in, 0, optsize); + memset(&exp_active_estab_in, 0, optsize); + memset(&exp_passive_fin_in, 0, optsize); + memset(&exp_active_fin_in, 0, optsize); + + memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg)); + memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg)); + exp_active_hdr_stg.active = true; + + err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport); + while (!err) { + bpf_map_delete_elem(lport_linum_map_fd, &lport); + err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport); + } +} + +static void fastopen_estab(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + exp_passive_hdr_stg.fastopen = true; + + prepare_out(); + + /* Allow fastopen without fastopen cookie */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, true)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void syncookie_estab(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS | + OPTION_F_RESEND; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + exp_passive_hdr_stg.syncookie = true; + exp_active_hdr_stg.resend_syn = true, + + prepare_out(); + + /* Clear the RESEND to ensure the bpf prog can learn + * want_cookie and set the RESEND by itself. + */ + skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND; + + /* Enforce syncookie mode */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void fin(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_fin_in.flags = OPTION_F_RAND; + exp_passive_fin_in.rand = 0xfa; + + exp_active_fin_in.flags = OPTION_F_RAND; + exp_active_fin_in.rand = 0xce; + + prepare_out(); + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void __simple_estab(bool exprm) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + prepare_out(); + + if (!exprm) { + skel->data->test_kind = 0xB9; + skel->data->test_magic = 0; + } + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void no_exprm_estab(void) +{ + __simple_estab(false); +} + +static void simple_estab(void) +{ + __simple_estab(true); +} + +static void misc(void) +{ + const char send_msg[] = "MISC!!!"; + char recv_msg[sizeof(send_msg)]; + const unsigned int nr_data = 2; + struct bpf_link *link; + struct sk_fds sk_fds; + int i, ret; + + lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map); + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + for (i = 0; i < nr_data; i++) { + /* MSG_EOR to ensure skb will not be combined */ + ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg), + MSG_EOR); + if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", + ret)) + goto check_linum; + + ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg)); + if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n", + ret)) + goto check_linum; + } + + if (sk_fds_shutdown(&sk_fds)) + goto check_linum; + + CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn", + "expected (1) != actual (%u)\n", + misc_skel->bss->nr_syn); + + CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data", + "expected (%u) != actual (%u)\n", + nr_data, misc_skel->bss->nr_data); + + /* The last ACK may have been delayed, so it is either 1 or 2. */ + CHECK(misc_skel->bss->nr_pure_ack != 1 && + misc_skel->bss->nr_pure_ack != 2, + "unexpected nr_pure_ack", + "expected (1 or 2) != actual (%u)\n", + misc_skel->bss->nr_pure_ack); + + CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin", + "expected (1) != actual (%u)\n", + misc_skel->bss->nr_fin); + +check_linum: + CHECK_FAIL(check_error_linum(&sk_fds)); + sk_fds_close(&sk_fds); + bpf_link__destroy(link); +} + +struct test { + const char *desc; + void (*run)(void); +}; + +#define DEF_TEST(name) { #name, name } +static struct test tests[] = { + DEF_TEST(simple_estab), + DEF_TEST(no_exprm_estab), + DEF_TEST(syncookie_estab), + DEF_TEST(fastopen_estab), + DEF_TEST(fin), + DEF_TEST(misc), +}; + +void test_tcp_hdr_options(void) +{ + int i; + + skel = test_tcp_hdr_options__open_and_load(); + if (CHECK(!skel, "open and load skel", "failed")) + return; + + misc_skel = test_misc_tcp_hdr_options__open_and_load(); + if (CHECK(!misc_skel, "open and load misc test skel", "failed")) + goto skel_destroy; + + cg_fd = test__join_cgroup(CG_NAME); + if (CHECK_FAIL(cg_fd < 0)) + goto skel_destroy; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].desc)) + continue; + + if (create_netns()) + break; + + tests[i].run(); + + reset_test(); + } + + close(cg_fd); +skel_destroy: + test_misc_tcp_hdr_options__destroy(misc_skel); + test_tcp_hdr_options__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c new file mode 100644 index 000000000000..3a216d1d0226 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define BPF_PROG_TEST_TCP_HDR_OPTIONS +#include "test_tcp_hdr_options.h" + +__u16 last_addr16_n = __bpf_htons(0xeB9F); +__u16 active_lport_n = 0; +__u16 active_lport_h = 0; +__u16 passive_lport_n = 0; +__u16 passive_lport_h = 0; + +/* options received at passive side */ +unsigned int nr_pure_ack = 0; +unsigned int nr_data = 0; +unsigned int nr_syn = 0; +unsigned int nr_fin = 0; + +/* Check the header received from the active side */ +static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) +{ + union { + struct tcphdr th; + struct ipv6hdr ip6; + struct tcp_exprm_opt exprm_opt; + struct tcp_opt reg_opt; + __u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */ + } hdr = {}; + __u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; + struct tcphdr *pth; + int ret; + + hdr.reg_opt.kind = 0xB9; + + /* The option is 4 bytes long instead of 2 bytes */ + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags); + if (ret != -ENOSPC) + RET_CG_ERR(ret); + + /* Test searching magic with regular kind */ + hdr.reg_opt.len = 4; + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt), + load_flags); + if (ret != -EINVAL) + RET_CG_ERR(ret); + + hdr.reg_opt.len = 0; + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt), + load_flags); + if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 || + hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce) + RET_CG_ERR(ret); + + /* Test searching experimental option with invalid kind length */ + hdr.exprm_opt.kind = TCPOPT_EXP; + hdr.exprm_opt.len = 5; + hdr.exprm_opt.magic = 0; + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != -EINVAL) + RET_CG_ERR(ret); + + /* Test searching experimental option with 0 magic value */ + hdr.exprm_opt.len = 4; + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != -ENOMSG) + RET_CG_ERR(ret); + + hdr.exprm_opt.magic = __bpf_htons(0xeB9F); + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != 4 || hdr.exprm_opt.len != 4 || + hdr.exprm_opt.kind != TCPOPT_EXP || + hdr.exprm_opt.magic != __bpf_htons(0xeB9F)) + RET_CG_ERR(ret); + + if (!check_syn) + return CG_OK; + + /* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV + * + * Test loading from tp->saved_syn for other sk_state. + */ + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6, + sizeof(hdr.ip6)); + if (ret != -ENOSPC) + RET_CG_ERR(ret); + + if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n || + hdr.ip6.daddr.s6_addr16[7] != last_addr16_n) + RET_CG_ERR(0); + + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr)); + if (ret < 0) + RET_CG_ERR(ret); + + pth = (struct tcphdr *)(&hdr.ip6 + 1); + if (pth->dest != passive_lport_n || pth->source != active_lport_n) + RET_CG_ERR(0); + + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr)); + if (ret < 0) + RET_CG_ERR(ret); + + if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n) + RET_CG_ERR(0); + + return CG_OK; +} + +static int check_active_syn_in(struct bpf_sock_ops *skops) +{ + return __check_active_hdr_in(skops, true); +} + +static int check_active_hdr_in(struct bpf_sock_ops *skops) +{ + struct tcphdr *th; + + if (__check_active_hdr_in(skops, false) == CG_ERR) + return CG_ERR; + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (tcp_hdrlen(th) < skops->skb_len) + nr_data++; + + if (th->fin) + nr_fin++; + + if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len) + nr_pure_ack++; + + return CG_OK; +} + +static int active_opt_len(struct bpf_sock_ops *skops) +{ + int err; + + /* Reserve more than enough to allow the -EEXIST test in + * the write_active_opt(). + */ + err = bpf_reserve_hdr_opt(skops, 12, 0); + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int write_active_opt(struct bpf_sock_ops *skops) +{ + struct tcp_exprm_opt exprm_opt = {}; + struct tcp_opt win_scale_opt = {}; + struct tcp_opt reg_opt = {}; + struct tcphdr *th; + int err, ret; + + exprm_opt.kind = TCPOPT_EXP; + exprm_opt.len = 4; + exprm_opt.magic = __bpf_htons(0xeB9F); + + reg_opt.kind = 0xB9; + reg_opt.len = 4; + reg_opt.data[0] = 0xfa; + reg_opt.data[1] = 0xce; + + win_scale_opt.kind = TCPOPT_WINDOW; + + err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (err) + RET_CG_ERR(err); + + /* Store the same exprm option */ + err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + + err = bpf_store_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (err) + RET_CG_ERR(err); + err = bpf_store_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + + /* Check the option has been written and can be searched */ + ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP || + exprm_opt.magic != __bpf_htons(0xeB9F)) + RET_CG_ERR(ret); + + reg_opt.len = 0; + ret = bpf_load_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 || + reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce) + RET_CG_ERR(ret); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (th->syn) { + active_lport_h = skops->local_port; + active_lport_n = th->source; + + /* Search the win scale option written by kernel + * in the SYN packet. + */ + ret = bpf_load_hdr_opt(skops, &win_scale_opt, + sizeof(win_scale_opt), 0); + if (ret != 3 || win_scale_opt.len != 3 || + win_scale_opt.kind != TCPOPT_WINDOW) + RET_CG_ERR(ret); + + /* Write the win scale option that kernel + * has already written. + */ + err = bpf_store_hdr_opt(skops, &win_scale_opt, + sizeof(win_scale_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_hdr_opt_len(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + /* Check the SYN from bpf_sock_ops_kern->syn_skb */ + return check_active_syn_in(skops); + + /* Passive side should have cleared the write hdr cb by now */ + if (skops->local_port == passive_lport_h) + RET_CG_ERR(0); + + return active_opt_len(skops); +} + +static int handle_write_hdr_opt(struct bpf_sock_ops *skops) +{ + if (skops->local_port == passive_lport_h) + RET_CG_ERR(0); + + return write_active_opt(skops); +} + +static int handle_parse_hdr(struct bpf_sock_ops *skops) +{ + /* Passive side is not writing any non-standard/unknown + * option, so the active side should never be called. + */ + if (skops->local_port == active_lport_h) + RET_CG_ERR(0); + + return check_active_hdr_in(skops); +} + +static int handle_passive_estab(struct bpf_sock_ops *skops) +{ + int err; + + /* No more write hdr cb */ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); + + /* Recheck the SYN but check the tp->saved_syn this time */ + err = check_active_syn_in(skops); + if (err == CG_ERR) + return err; + + nr_syn++; + + /* The ack has header option written by the active side also */ + return check_active_hdr_in(skops); +} + +SEC("sockops/misc_estab") +int misc_estab(struct bpf_sock_ops *skops) +{ + int true_val = 1; + + switch (skops->op) { + case BPF_SOCK_OPS_TCP_LISTEN_CB: + passive_lport_h = skops->local_port; + passive_lport_n = __bpf_htons(passive_lport_h); + bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, + &true_val, sizeof(true_val)); + set_hdr_cb_flags(skops); + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + set_hdr_cb_flags(skops); + break; + case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: + return handle_parse_hdr(skops); + case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + return handle_hdr_opt_len(skops); + case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + return handle_write_hdr_opt(skops); + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + return handle_passive_estab(skops); + } + + return CG_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c new file mode 100644 index 000000000000..9197a23df3da --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -0,0 +1,623 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define BPF_PROG_TEST_TCP_HDR_OPTIONS +#include "test_tcp_hdr_options.h" + +#ifndef sizeof_field +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +__u8 test_kind = TCPOPT_EXP; +__u16 test_magic = 0xeB9F; + +struct bpf_test_option passive_synack_out = {}; +struct bpf_test_option passive_fin_out = {}; + +struct bpf_test_option passive_estab_in = {}; +struct bpf_test_option passive_fin_in = {}; + +struct bpf_test_option active_syn_out = {}; +struct bpf_test_option active_fin_out = {}; + +struct bpf_test_option active_estab_in = {}; +struct bpf_test_option active_fin_in = {}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct hdr_stg); +} hdr_stg_map SEC(".maps"); + +static bool skops_want_cookie(const struct bpf_sock_ops *skops) +{ + return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE; +} + +static bool skops_current_mss(const struct bpf_sock_ops *skops) +{ + return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS; +} + +static __u8 option_total_len(__u8 flags) +{ + __u8 i, len = 1; /* +1 for flags */ + + if (!flags) + return 0; + + /* RESEND bit does not use a byte */ + for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++) + len += !!TEST_OPTION_FLAGS(flags, i); + + if (test_kind == TCPOPT_EXP) + return len + TCP_BPF_EXPOPT_BASE_LEN; + else + return len + 2; /* +1 kind, +1 kind-len */ +} + +static void write_test_option(const struct bpf_test_option *test_opt, + __u8 *data) +{ + __u8 offset = 0; + + data[offset++] = test_opt->flags; + if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS)) + data[offset++] = test_opt->max_delack_ms; + + if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND)) + data[offset++] = test_opt->rand; +} + +static int store_option(struct bpf_sock_ops *skops, + const struct bpf_test_option *test_opt) +{ + union { + struct tcp_exprm_opt exprm; + struct tcp_opt regular; + } write_opt; + int err; + + if (test_kind == TCPOPT_EXP) { + write_opt.exprm.kind = TCPOPT_EXP; + write_opt.exprm.len = option_total_len(test_opt->flags); + write_opt.exprm.magic = __bpf_htons(test_magic); + write_opt.exprm.data32 = 0; + write_test_option(test_opt, write_opt.exprm.data); + err = bpf_store_hdr_opt(skops, &write_opt.exprm, + sizeof(write_opt.exprm), 0); + } else { + write_opt.regular.kind = test_kind; + write_opt.regular.len = option_total_len(test_opt->flags); + write_opt.regular.data32 = 0; + write_test_option(test_opt, write_opt.regular.data); + err = bpf_store_hdr_opt(skops, &write_opt.regular, + sizeof(write_opt.regular), 0); + } + + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int parse_test_option(struct bpf_test_option *opt, const __u8 *start) +{ + opt->flags = *start++; + + if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS)) + opt->max_delack_ms = *start++; + + if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND)) + opt->rand = *start++; + + return 0; +} + +static int load_option(struct bpf_sock_ops *skops, + struct bpf_test_option *test_opt, bool from_syn) +{ + union { + struct tcp_exprm_opt exprm; + struct tcp_opt regular; + } search_opt; + int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; + + if (test_kind == TCPOPT_EXP) { + search_opt.exprm.kind = TCPOPT_EXP; + search_opt.exprm.len = 4; + search_opt.exprm.magic = __bpf_htons(test_magic); + search_opt.exprm.data32 = 0; + ret = bpf_load_hdr_opt(skops, &search_opt.exprm, + sizeof(search_opt.exprm), load_flags); + if (ret < 0) + return ret; + return parse_test_option(test_opt, search_opt.exprm.data); + } else { + search_opt.regular.kind = test_kind; + search_opt.regular.len = 0; + search_opt.regular.data32 = 0; + ret = bpf_load_hdr_opt(skops, &search_opt.regular, + sizeof(search_opt.regular), load_flags); + if (ret < 0) + return ret; + return parse_test_option(test_opt, search_opt.regular.data); + } +} + +static int synack_opt_len(struct bpf_sock_ops *skops) +{ + struct bpf_test_option test_opt = {}; + __u8 optlen; + int err; + + if (!passive_synack_out.flags) + return CG_OK; + + err = load_option(skops, &test_opt, true); + + /* bpf_test_option is not found */ + if (err == -ENOMSG) + return CG_OK; + + if (err) + RET_CG_ERR(err); + + optlen = option_total_len(passive_synack_out.flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_synack_opt(struct bpf_sock_ops *skops) +{ + struct bpf_test_option opt; + + if (!passive_synack_out.flags) + /* We should not even be called since no header + * space has been reserved. + */ + RET_CG_ERR(0); + + opt = passive_synack_out; + if (skops_want_cookie(skops)) + SET_OPTION_FLAGS(opt.flags, OPTION_RESEND); + + return store_option(skops, &opt); +} + +static int syn_opt_len(struct bpf_sock_ops *skops) +{ + __u8 optlen; + int err; + + if (!active_syn_out.flags) + return CG_OK; + + optlen = option_total_len(active_syn_out.flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_syn_opt(struct bpf_sock_ops *skops) +{ + if (!active_syn_out.flags) + RET_CG_ERR(0); + + return store_option(skops, &active_syn_out); +} + +static int fin_opt_len(struct bpf_sock_ops *skops) +{ + struct bpf_test_option *opt; + struct hdr_stg *hdr_stg; + __u8 optlen; + int err; + + if (!skops->sk) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->active) + opt = &active_fin_out; + else + opt = &passive_fin_out; + + optlen = option_total_len(opt->flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_fin_opt(struct bpf_sock_ops *skops) +{ + struct bpf_test_option *opt; + struct hdr_stg *hdr_stg; + + if (!skops->sk) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->active) + opt = &active_fin_out; + else + opt = &passive_fin_out; + + if (!opt->flags) + RET_CG_ERR(0); + + return store_option(skops, opt); +} + +static int resend_in_ack(struct bpf_sock_ops *skops) +{ + struct hdr_stg *hdr_stg; + + if (!skops->sk) + return -1; + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + return -1; + + return !!hdr_stg->resend_syn; +} + +static int nodata_opt_len(struct bpf_sock_ops *skops) +{ + int resend; + + resend = resend_in_ack(skops); + if (resend < 0) + RET_CG_ERR(0); + + if (resend) + return syn_opt_len(skops); + + return CG_OK; +} + +static int write_nodata_opt(struct bpf_sock_ops *skops) +{ + int resend; + + resend = resend_in_ack(skops); + if (resend < 0) + RET_CG_ERR(0); + + if (resend) + return write_syn_opt(skops); + + return CG_OK; +} + +static int data_opt_len(struct bpf_sock_ops *skops) +{ + /* Same as the nodata version. Mostly to show + * an example usage on skops->skb_len. + */ + return nodata_opt_len(skops); +} + +static int write_data_opt(struct bpf_sock_ops *skops) +{ + return write_nodata_opt(skops); +} + +static int current_mss_opt_len(struct bpf_sock_ops *skops) +{ + /* Reserve maximum that may be needed */ + int err; + + err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0); + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int handle_hdr_opt_len(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + return synack_opt_len(skops); + + if (tcp_flags & TCPHDR_SYN) + return syn_opt_len(skops); + + if (tcp_flags & TCPHDR_FIN) + return fin_opt_len(skops); + + if (skops_current_mss(skops)) + /* The kernel is calculating the MSS */ + return current_mss_opt_len(skops); + + if (skops->skb_len) + return data_opt_len(skops); + + return nodata_opt_len(skops); +} + +static int handle_write_hdr_opt(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + struct tcphdr *th; + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + return write_synack_opt(skops); + + if (tcp_flags & TCPHDR_SYN) + return write_syn_opt(skops); + + if (tcp_flags & TCPHDR_FIN) + return write_fin_opt(skops); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (skops->skb_len > tcp_hdrlen(th)) + return write_data_opt(skops); + + return write_nodata_opt(skops); +} + +static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms) +{ + __u32 max_delack_us = max_delack_ms * 1000; + + return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX, + &max_delack_us, sizeof(max_delack_us)); +} + +static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms) +{ + __u32 min_rto_us = peer_max_delack_ms * 1000; + + return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us, + sizeof(min_rto_us)); +} + +static int handle_active_estab(struct bpf_sock_ops *skops) +{ + struct hdr_stg init_stg = { + .active = true, + }; + int err; + + err = load_option(skops, &active_estab_in, false); + if (err && err != -ENOMSG) + RET_CG_ERR(err); + + init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags, + OPTION_RESEND); + if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, + &init_stg, + BPF_SK_STORAGE_GET_F_CREATE)) + RET_CG_ERR(0); + + if (init_stg.resend_syn) + /* Don't clear the write_hdr cb now because + * the ACK may get lost and retransmit may + * be needed. + * + * PARSE_ALL_HDR cb flag is set to learn if this + * resend_syn option has received by the peer. + * + * The header option will be resent until a valid + * packet is received at handle_parse_hdr() + * and all hdr cb flags will be cleared in + * handle_parse_hdr(). + */ + set_parse_all_hdr_cb_flags(skops); + else if (!active_fin_out.flags) + /* No options will be written from now */ + clear_hdr_cb_flags(skops); + + if (active_syn_out.max_delack_ms) { + err = set_delack_max(skops, active_syn_out.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + if (active_estab_in.max_delack_ms) { + err = set_rto_min(skops, active_estab_in.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_passive_estab(struct bpf_sock_ops *skops) +{ + struct hdr_stg init_stg = {}; + struct tcphdr *th; + int err; + + err = load_option(skops, &passive_estab_in, true); + if (err == -ENOENT) { + /* saved_syn is not found. It was in syncookie mode. + * We have asked the active side to resend the options + * in ACK, so try to find the bpf_test_option from ACK now. + */ + err = load_option(skops, &passive_estab_in, false); + init_stg.syncookie = true; + } + + /* ENOMSG: The bpf_test_option is not found which is fine. + * Bail out now for all other errors. + */ + if (err && err != -ENOMSG) + RET_CG_ERR(err); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (th->syn) { + /* Fastopen */ + + /* Cannot clear cb_flags to stop write_hdr cb. + * synack is not sent yet for fast open. + * Even it was, the synack may need to be retransmitted. + * + * PARSE_ALL_HDR cb flag is set to learn + * if synack has reached the peer. + * All cb_flags will be cleared in handle_parse_hdr(). + */ + set_parse_all_hdr_cb_flags(skops); + init_stg.fastopen = true; + } else if (!passive_fin_out.flags) { + /* No options will be written from now */ + clear_hdr_cb_flags(skops); + } + + if (!skops->sk || + !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, + BPF_SK_STORAGE_GET_F_CREATE)) + RET_CG_ERR(0); + + if (passive_synack_out.max_delack_ms) { + err = set_delack_max(skops, passive_synack_out.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + if (passive_estab_in.max_delack_ms) { + err = set_rto_min(skops, passive_estab_in.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_parse_hdr(struct bpf_sock_ops *skops) +{ + struct hdr_stg *hdr_stg; + struct tcphdr *th; + + if (!skops->sk) + RET_CG_ERR(0); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->resend_syn || hdr_stg->fastopen) + /* The PARSE_ALL_HDR cb flag was turned on + * to ensure that the previously written + * options have reached the peer. + * Those previously written option includes: + * - Active side: resend_syn in ACK during syncookie + * or + * - Passive side: SYNACK during fastopen + * + * A valid packet has been received here after + * the 3WHS, so the PARSE_ALL_HDR cb flag + * can be cleared now. + */ + clear_parse_all_hdr_cb_flags(skops); + + if (hdr_stg->resend_syn && !active_fin_out.flags) + /* Active side resent the syn option in ACK + * because the server was in syncookie mode. + * A valid packet has been received, so + * clear header cb flags if there is no + * more option to send. + */ + clear_hdr_cb_flags(skops); + + if (hdr_stg->fastopen && !passive_fin_out.flags) + /* Passive side was in fastopen. + * A valid packet has been received, so + * the SYNACK has reached the peer. + * Clear header cb flags if there is no more + * option to send. + */ + clear_hdr_cb_flags(skops); + + if (th->fin) { + struct bpf_test_option *fin_opt; + int err; + + if (hdr_stg->active) + fin_opt = &active_fin_in; + else + fin_opt = &passive_fin_in; + + err = load_option(skops, fin_opt, false); + if (err && err != -ENOMSG) + RET_CG_ERR(err); + } + + return CG_OK; +} + +SEC("sockops/estab") +int estab(struct bpf_sock_ops *skops) +{ + int true_val = 1; + + switch (skops->op) { + case BPF_SOCK_OPS_TCP_LISTEN_CB: + bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, + &true_val, sizeof(true_val)); + set_hdr_cb_flags(skops); + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + set_hdr_cb_flags(skops); + break; + case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: + return handle_parse_hdr(skops); + case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + return handle_hdr_opt_len(skops); + case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + return handle_write_hdr_opt(skops); + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + return handle_passive_estab(skops); + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + return handle_active_estab(skops); + } + + return CG_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h new file mode 100644 index 000000000000..78a8cf9eab42 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ + +#ifndef _TEST_TCP_HDR_OPTIONS_H +#define _TEST_TCP_HDR_OPTIONS_H + +struct bpf_test_option { + __u8 flags; + __u8 max_delack_ms; + __u8 rand; +} __attribute__((packed)); + +enum { + OPTION_RESEND, + OPTION_MAX_DELACK_MS, + OPTION_RAND, + __NR_OPTION_FLAGS, +}; + +#define OPTION_F_RESEND (1 << OPTION_RESEND) +#define OPTION_F_MAX_DELACK_MS (1 << OPTION_MAX_DELACK_MS) +#define OPTION_F_RAND (1 << OPTION_RAND) +#define OPTION_MASK ((1 << __NR_OPTION_FLAGS) - 1) + +#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option))) +#define SET_OPTION_FLAGS(flags, option) ((flags) |= (1 << (option))) + +/* Store in bpf_sk_storage */ +struct hdr_stg { + bool active; + bool resend_syn; /* active side only */ + bool syncookie; /* passive side only */ + bool fastopen; /* passive side only */ +}; + +struct linum_err { + unsigned int linum; + int err; +}; + +#define TCPHDR_FIN 0x01 +#define TCPHDR_SYN 0x02 +#define TCPHDR_RST 0x04 +#define TCPHDR_PSH 0x08 +#define TCPHDR_ACK 0x10 +#define TCPHDR_URG 0x20 +#define TCPHDR_ECE 0x40 +#define TCPHDR_CWR 0x80 +#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK) + +#define TCPOPT_EOL 0 +#define TCPOPT_NOP 1 +#define TCPOPT_WINDOW 3 +#define TCPOPT_EXP 254 + +#define TCP_BPF_EXPOPT_BASE_LEN 4 +#define MAX_TCP_HDR_LEN 60 +#define MAX_TCP_OPTION_SPACE 40 + +#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS + +#define CG_OK 1 +#define CG_ERR 0 + +#ifndef SOL_TCP +#define SOL_TCP 6 +#endif + +struct tcp_exprm_opt { + __u8 kind; + __u8 len; + __u16 magic; + union { + __u8 data[4]; + __u32 data32; + }; +} __attribute__((packed)); + +struct tcp_opt { + __u8 kind; + __u8 len; + union { + __u8 data[4]; + __u32 data32; + }; +} __attribute__((packed)); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __type(key, int); + __type(value, struct linum_err); +} lport_linum_map SEC(".maps"); + +static inline unsigned int tcp_hdrlen(const struct tcphdr *th) +{ + return th->doff << 2; +} + +static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops) +{ + return skops->skb_tcp_flags; +} + +static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)); +} + +static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags | + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); +} +static inline void +clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); +} + +static inline void +set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags | + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); +} + +#define RET_CG_ERR(__err) ({ \ + struct linum_err __linum_err; \ + int __lport; \ + \ + __linum_err.linum = __LINE__; \ + __linum_err.err = __err; \ + __lport = skops->local_port; \ + bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \ + clear_hdr_cb_flags(skops); \ + clear_parse_all_hdr_cb_flags(skops); \ + return CG_ERR; \ +}) + +#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */ + +#endif /* _TEST_TCP_HDR_OPTIONS_H */ -- cgit v1.3.1 From 267cf9fa43d1c9d525d5d818a8651f2900e3aa9e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:01:23 -0700 Subject: tcp: bpf: Optionally store mac header in TCP_SAVE_SYN This patch is adapted from Eric's patch in an earlier discussion [1]. The TCP_SAVE_SYN currently only stores the network header and tcp header. This patch allows it to optionally store the mac header also if the setsockopt's optval is 2. It requires one more bit for the "save_syn" bit field in tcp_sock. This patch achieves this by moving the syn_smc bit next to the is_mptcp. The syn_smc is currently used with the TCP experimental option. Since syn_smc is only used when CONFIG_SMC is enabled, this patch also puts the "IS_ENABLED(CONFIG_SMC)" around it like the is_mptcp did with "IS_ENABLED(CONFIG_MPTCP)". The mac_hdrlen is also stored in the "struct saved_syn" to allow a quick offset from the bpf prog if it chooses to start getting from the network header or the tcp header. [1]: https://lore.kernel.org/netdev/CANn89iLJNWh6bkH7DNhy_kmcAexuUCccqERqe7z2QsvPhGrYPQ@mail.gmail.com/ Suggested-by: Eric Dumazet Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/bpf/20200820190123.2886935-1-kafai@fb.com --- include/linux/tcp.h | 13 ++++++++----- include/net/request_sock.h | 1 + include/uapi/linux/bpf.h | 1 + net/core/filter.c | 27 ++++++++++++++++++++++----- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_input.c | 14 +++++++++++++- tools/include/uapi/linux/bpf.h | 1 + 7 files changed, 48 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 29d166263ae7..56ff2952edaf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -239,14 +239,13 @@ struct tcp_sock { repair : 1, frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; - u8 syn_data:1, /* SYN includes data */ + u8 save_syn:2, /* Save headers of SYN packet */ + syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ - save_syn:1, /* Save headers of SYN packet */ - is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ - syn_smc:1; /* SYN includes SMC */ + is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ @@ -393,6 +392,9 @@ struct tcp_sock { #if IS_ENABLED(CONFIG_MPTCP) bool is_mptcp; #endif +#if IS_ENABLED(CONFIG_SMC) + bool syn_smc; /* SYN includes SMC */ +#endif #ifdef CONFIG_TCP_MD5SIG /* TCP AF-Specific parts; only used by MD5 Signature support so far */ @@ -488,7 +490,8 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn) { - return saved_syn->network_hdrlen + saved_syn->tcp_hdrlen; + return saved_syn->mac_hdrlen + saved_syn->network_hdrlen + + saved_syn->tcp_hdrlen; } struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 7d9ed99a77bd..29e41ff3ec93 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -42,6 +42,7 @@ struct request_sock_ops { int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req); struct saved_syn { + u32 mac_hdrlen; u32 network_hdrlen; u32 tcp_hdrlen; u8 data[]; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f67ec5d9e57d..544b89a64918 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4540,6 +4540,7 @@ enum { */ TCP_BPF_SYN = 1005, /* Copy the TCP header */ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ + TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index ab5603d5b62a..47eef9a0be6a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4682,11 +4682,16 @@ static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock, if (optname == TCP_BPF_SYN) { hdr_start = syn_skb->data; ret = tcp_hdrlen(syn_skb); - } else { - /* optname == TCP_BPF_SYN_IP */ + } else if (optname == TCP_BPF_SYN_IP) { hdr_start = skb_network_header(syn_skb); ret = skb_network_header_len(syn_skb) + tcp_hdrlen(syn_skb); + } else { + /* optname == TCP_BPF_SYN_MAC */ + hdr_start = skb_mac_header(syn_skb); + ret = skb_mac_header_len(syn_skb) + + skb_network_header_len(syn_skb) + + tcp_hdrlen(syn_skb); } } else { struct sock *sk = bpf_sock->sk; @@ -4706,12 +4711,24 @@ static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock, if (optname == TCP_BPF_SYN) { hdr_start = saved_syn->data + + saved_syn->mac_hdrlen + saved_syn->network_hdrlen; ret = saved_syn->tcp_hdrlen; + } else if (optname == TCP_BPF_SYN_IP) { + hdr_start = saved_syn->data + + saved_syn->mac_hdrlen; + ret = saved_syn->network_hdrlen + + saved_syn->tcp_hdrlen; } else { - /* optname == TCP_BPF_SYN_IP */ + /* optname == TCP_BPF_SYN_MAC */ + + /* TCP_SAVE_SYN may not have saved the mac hdr */ + if (!saved_syn->mac_hdrlen) + return -ENOENT; + hdr_start = saved_syn->data; - ret = saved_syn->network_hdrlen + + ret = saved_syn->mac_hdrlen + + saved_syn->network_hdrlen + saved_syn->tcp_hdrlen; } } @@ -4724,7 +4741,7 @@ BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, int, level, int, optname, char *, optval, int, optlen) { if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP && - optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_IP) { + optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) { int ret, copy_len = 0; const u8 *start; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6075cb091a20..57a568875539 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3211,7 +3211,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_SAVE_SYN: - if (val < 0 || val > 1) + /* 0: disable, 1: enable, 2: start from ether_header */ + if (val < 0 || val > 2) err = -EINVAL; else tp->save_syn = val; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 319cc7fd5117..4337841faeff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6676,13 +6676,25 @@ static void tcp_reqsk_record_syn(const struct sock *sk, if (tcp_sk(sk)->save_syn) { u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb); struct saved_syn *saved_syn; + u32 mac_hdrlen; + void *base; + + if (tcp_sk(sk)->save_syn == 2) { /* Save full header. */ + base = skb_mac_header(skb); + mac_hdrlen = skb_mac_header_len(skb); + len += mac_hdrlen; + } else { + base = skb_network_header(skb); + mac_hdrlen = 0; + } saved_syn = kmalloc(struct_size(saved_syn, data, len), GFP_ATOMIC); if (saved_syn) { + saved_syn->mac_hdrlen = mac_hdrlen; saved_syn->network_hdrlen = skb_network_header_len(skb); saved_syn->tcp_hdrlen = tcp_hdrlen(skb); - memcpy(saved_syn->data, skb_network_header(skb), len); + memcpy(saved_syn->data, base, len); req->saved_syn = saved_syn; } } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f67ec5d9e57d..544b89a64918 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4540,6 +4540,7 @@ enum { */ TCP_BPF_SYN = 1005, /* Copy the TCP header */ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ + TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ }; enum { -- cgit v1.3.1 From 3418c56de81fd73f2265c8915f4b910bcc141cb7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 21 Aug 2020 15:55:56 -0700 Subject: libbpf: Avoid false unuinitialized variable warning in bpf_core_apply_relo Some versions of GCC report uninitialized targ_spec usage. GCC is wrong, but let's avoid unnecessary warnings. Fixes: ddc7c3042614 ("libbpf: implement BPF CO-RE offset relocation algorithm") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821225556.2178419-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fb7b8fb1d3fa..cd4a2bba05ff 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5390,7 +5390,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, struct hashmap *cand_cache) { const char *prog_name = bpf_program__title(prog, false); - struct bpf_core_spec local_spec, cand_spec, targ_spec; + struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; const void *type_key = u32_as_hash_key(relo->type_id); struct bpf_core_relo_res cand_res, targ_res; const struct btf_type *local_type; -- cgit v1.3.1 From f872e4bc47f100ccbbe9469cd12e5ac78e4b6eda Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 21 Aug 2020 15:56:53 -0700 Subject: libbpf: Fix type compatibility check copy-paste error Fix copy-paste error in types compatibility check. Local type is accidentally used instead of target type for the very first type check strictness check. This can result in potentially less strict candidate comparison. Fix the error. Fixes: 3fc32f40c402 ("libbpf: Implement type-based CO-RE relocations support") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821225653.2180782-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cd4a2bba05ff..2e2523d8bb6d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4708,7 +4708,7 @@ static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id /* caller made sure that names match (ignoring flavor suffix) */ local_type = btf__type_by_id(local_btf, local_id); - targ_type = btf__type_by_id(local_btf, local_id); + targ_type = btf__type_by_id(targ_btf, targ_id); if (btf_kind(local_type) != btf_kind(targ_type)) return 0; -- cgit v1.3.1 From 8c3b3d971f2e4d72d8768457c40b513ca5d1f906 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 24 Aug 2020 09:45:23 +0100 Subject: selftests: bpf: Fix sockmap update nits Address review by Yonghong, to bring the new tests in line with the usual code style. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200824084523.13104-1-lmb@cloudflare.com --- tools/testing/selftests/bpf/prog_tests/sockmap_basic.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 65ce7c289534..0b79d78b98db 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -118,10 +118,8 @@ static void test_sockmap_update(enum bpf_map_type map_type) return; skel = test_sockmap_update__open_and_load(); - if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) { - close(sk); - return; - } + if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) + goto close_sk; prog = bpf_program__fd(skel->progs.copy_sock_map); src = bpf_map__fd(skel->maps.src); @@ -158,8 +156,9 @@ static void test_sockmap_update(enum bpf_map_type map_type) dst_cookie, src_cookie); out: - close(sk); test_sockmap_update__destroy(skel); +close_sk: + close(sk); } static void test_sockmap_invalid_update(void) @@ -168,8 +167,7 @@ static void test_sockmap_invalid_update(void) int duration = 0; skel = test_sockmap_invalid_update__open_and_load(); - CHECK(skel, "open_and_load", "verifier accepted map_update\n"); - if (skel) + if (CHECK(skel, "open_and_load", "verifier accepted map_update\n")) test_sockmap_invalid_update__destroy(skel); } -- cgit v1.3.1 From 883957b1c4ac5554ce515e882b7b2b20cbadfdd1 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 3 Jul 2020 15:40:30 +0200 Subject: objtool: Add atomic builtin TSAN instrumentation to uaccess whitelist Adds the new TSAN functions that may be emitted for atomic builtins to objtool's uaccess whitelist. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Cc: Josh Poimboeuf Cc: Peter Zijlstra --- tools/objtool/check.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e034a8f24f46..7546a9d4e3cf 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -528,6 +528,56 @@ static const char *uaccess_safe_builtin[] = { "__tsan_write4", "__tsan_write8", "__tsan_write16", + "__tsan_atomic8_load", + "__tsan_atomic16_load", + "__tsan_atomic32_load", + "__tsan_atomic64_load", + "__tsan_atomic8_store", + "__tsan_atomic16_store", + "__tsan_atomic32_store", + "__tsan_atomic64_store", + "__tsan_atomic8_exchange", + "__tsan_atomic16_exchange", + "__tsan_atomic32_exchange", + "__tsan_atomic64_exchange", + "__tsan_atomic8_fetch_add", + "__tsan_atomic16_fetch_add", + "__tsan_atomic32_fetch_add", + "__tsan_atomic64_fetch_add", + "__tsan_atomic8_fetch_sub", + "__tsan_atomic16_fetch_sub", + "__tsan_atomic32_fetch_sub", + "__tsan_atomic64_fetch_sub", + "__tsan_atomic8_fetch_and", + "__tsan_atomic16_fetch_and", + "__tsan_atomic32_fetch_and", + "__tsan_atomic64_fetch_and", + "__tsan_atomic8_fetch_or", + "__tsan_atomic16_fetch_or", + "__tsan_atomic32_fetch_or", + "__tsan_atomic64_fetch_or", + "__tsan_atomic8_fetch_xor", + "__tsan_atomic16_fetch_xor", + "__tsan_atomic32_fetch_xor", + "__tsan_atomic64_fetch_xor", + "__tsan_atomic8_fetch_nand", + "__tsan_atomic16_fetch_nand", + "__tsan_atomic32_fetch_nand", + "__tsan_atomic64_fetch_nand", + "__tsan_atomic8_compare_exchange_strong", + "__tsan_atomic16_compare_exchange_strong", + "__tsan_atomic32_compare_exchange_strong", + "__tsan_atomic64_compare_exchange_strong", + "__tsan_atomic8_compare_exchange_weak", + "__tsan_atomic16_compare_exchange_weak", + "__tsan_atomic32_compare_exchange_weak", + "__tsan_atomic64_compare_exchange_weak", + "__tsan_atomic8_compare_exchange_val", + "__tsan_atomic16_compare_exchange_val", + "__tsan_atomic32_compare_exchange_val", + "__tsan_atomic64_compare_exchange_val", + "__tsan_atomic_thread_fence", + "__tsan_atomic_signal_fence", /* KCOV */ "write_comp_data", "check_kcov_mode", -- cgit v1.3.1 From a81b37590ff2e2507940ec278910b1d315dc73b3 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Jul 2020 09:00:02 +0200 Subject: objtool, kcsan: Add __tsan_read_write to uaccess whitelist Adds the new __tsan_read_write compound instrumentation to objtool's uaccess whitelist. Acked-by: Peter Zijlstra (Intel) Cc: Josh Poimboeuf Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- tools/objtool/check.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7546a9d4e3cf..5eee156f6f90 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -528,6 +528,11 @@ static const char *uaccess_safe_builtin[] = { "__tsan_write4", "__tsan_write8", "__tsan_write16", + "__tsan_read_write1", + "__tsan_read_write2", + "__tsan_read_write4", + "__tsan_read_write8", + "__tsan_read_write16", "__tsan_atomic8_load", "__tsan_atomic16_load", "__tsan_atomic32_load", -- cgit v1.3.1 From f033ad8d85cc2ffafdba3f848295b9840d028278 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 23 Aug 2020 11:06:23 +0300 Subject: selftests: mlxsw: Decrease required rate accuracy On Spectrum-{2,3} the required accuracy is +/-10%. Align the test to this requirement so that it can reliably pass on these platforms. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 47edf099a17e..bd4ffed1cca3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -220,8 +220,8 @@ __rate_test() rate=$(trap_rate_get) pct=$((100 * (rate - 1000) / 1000)) - ((-5 <= pct && pct <= 5)) - check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%" + ((-10 <= pct && pct <= 10)) + check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%" log_info "Expected rate 1000 pps, measured rate $rate pps" drop_rate=$(policer_drop_rate_get $id) -- cgit v1.3.1 From 8e0d8ce4fc8ba96af716f7a80702e71c3e379f85 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 23 Aug 2020 11:06:24 +0300 Subject: selftests: mlxsw: Increase burst size for rate test The current combination of rate and burst size does not adhere to Spectrum-{2,3} limitation which states that the minimum burst size should be 40% of the rate. Increase the burst size in order to honor above mentioned limitation and avoid intermittent failures of this test case on Spectrum-{2,3}. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index bd4ffed1cca3..ecba9ab81c2a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -207,7 +207,7 @@ __rate_test() RET=0 - devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16 + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512 devlink trap group set $DEVLINK_DEV group l3_drops policer $id # Send packets at highest possible rate and make sure they are dropped -- cgit v1.3.1 From b36cca02dc6e19d7d9134f247fabab58259cc992 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 23 Aug 2020 11:06:25 +0300 Subject: selftests: mlxsw: Increase burst size for burst test The current combination of rate and burst size does not adhere to Spectrum-{2,3} limitation which states that the minimum burst size should be 40% of the rate. Increase the burst size in order to honor above mentioned limitation and avoid intermittent failures of this test case on Spectrum-{2,3}. Remove the first sub-test case as the variation in number of received packets is simply too large to reliably test it. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/devlink_trap_policer.sh | 27 ++-------------------- 1 file changed, 2 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index ecba9ab81c2a..508a702f0021 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -288,35 +288,12 @@ __burst_test() RET=0 - devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32 + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512 devlink trap group set $DEVLINK_DEV group l3_drops policer $id - # Send a burst of 64 packets and make sure that about 32 are received - # and the rest are dropped by the policer - log_info "=== Tx burst size: 64, Policer burst size: 32 pps ===" - - t0_rx=$(devlink_trap_rx_packets_get blackhole_route) - t0_drop=$(devlink_trap_policer_rx_dropped_get $id) - - start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64 - - t1_rx=$(devlink_trap_rx_packets_get blackhole_route) - t1_drop=$(devlink_trap_policer_rx_dropped_get $id) - - rx=$((t1_rx - t0_rx)) - pct=$((100 * (rx - 32) / 32)) - ((-20 <= pct && pct <= 20)) - check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%" - log_info "Expected burst size of 32 packets, measured burst size of $rx packets" - - drop=$((t1_drop - t0_drop)) - (( drop > 0 )) - check_err $? "Expected non-zero policer drops, got 0" - log_info "Measured policer drops of $drop packets" - # Send a burst of 16 packets and make sure that 16 are received # and that none are dropped by the policer - log_info "=== Tx burst size: 16, Policer burst size: 32 pps ===" + log_info "=== Tx burst size: 16, Policer burst size: 512 ===" t0_rx=$(devlink_trap_rx_packets_get blackhole_route) t0_drop=$(devlink_trap_policer_rx_dropped_get $id) -- cgit v1.3.1 From 24f54c522590db790c2311a35284939c03268231 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Sun, 23 Aug 2020 11:06:26 +0300 Subject: selftests: forwarding: Fix mausezahn delay parameter in mirror_test() Currently, mausezahn delay parameter in mirror_test() is specified with 'ms' units. mausezahn versions before 0.6.5 interpret 'ms' as seconds and therefore the tests that use mirror_test() take a very long time to complete. Resolve this by specifying 'msec' units. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/mirror_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index c33bfd7ba214..13db1cb50e57 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -31,7 +31,7 @@ mirror_test() local t0=$(tc_rule_stats_get $dev $pref) $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ - -c 10 -d 100ms -t icmp type=8 + -c 10 -d 100msec -t icmp type=8 sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) -- cgit v1.3.1 From ffff9c9cb451d16c42cea79adbaaafa423902113 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 23 Aug 2020 11:06:27 +0300 Subject: selftests: mlxsw: Reduce runtime of tc-police scale test Currently, the test takes about 626 seconds to complete because of an inefficient use of the device's TCAM. Reduce the runtime to 202 seconds by inserting all the flower filters with the same preference and mask, but with a different key. In particular, this reduces the deletion of the qdisc (which triggers the deletion of all the filters) from 66 seconds to 0.2 seconds. This prevents various netlink requests from user space applications (e.g., systemd-networkd) from timing-out because RTNL is not held for too long anymore. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh index 4b96561c462f..3e3e06ea5703 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -24,6 +24,13 @@ tc_police_switch_destroy() simple_if_fini $swp1 } +tc_police_addr() +{ + local num=$1; shift + + printf "2001:db8:1::%x" $num +} + tc_police_rules_create() { local count=$1; shift @@ -34,8 +41,9 @@ tc_police_rules_create() for ((i = 0; i < count; ++i)); do cat >> $TC_POLICE_BATCH_FILE <<-EOF filter add dev $swp1 ingress \ - prot ip \ - flower skip_sw \ + prot ipv6 \ + pref 1000 \ + flower skip_sw dst_ip $(tc_police_addr $i) \ action police rate 10mbit burst 100k \ conform-exceed drop/ok EOF -- cgit v1.3.1 From 687d4775db56d24c81b4704056186d6c506de30d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 30 Jun 2020 13:37:22 -0700 Subject: torture: Declare parse-console.sh independence from rcutorture Currently, parse-torture.sh looks at the fifth field of torture-test console output for the version number. This works fine for rcutorture, but not for scftorture, which lacks the pointer field. This commit therefore adjusts matching lines so that the parse-console.sh awk script always sees the version number as the first field in the lines passed to it. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/parse-console.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 71a9f43a3918..4e081a25761e 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -67,6 +67,7 @@ then grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | + sed -e 's/^.*ver: //' | awk ' BEGIN { ver = 0; @@ -74,13 +75,13 @@ then } { - if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { + if (!badseq && ($1 + 0 != $1 || $1 <= ver)) { badseqno1 = ver; - badseqno2 = $5; + badseqno2 = $1; badseqnr = NR; badseq = 1; } - ver = $5 + ver = $1 } END { -- cgit v1.3.1 From 80c9476e683ec37ba45fd8e6a5c5081bea207e1a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 24 Jun 2020 17:57:07 -0700 Subject: torture: Add scftorture to the rcutorture scripting This commit updates the rcutorture scripting to include the new scftorture torture-test module. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-recheck-scf.sh | 38 ++++++++++++++++++++++ tools/testing/selftests/rcutorture/bin/kvm.sh | 2 +- .../selftests/rcutorture/configs/scf/CFLIST | 2 ++ .../selftests/rcutorture/configs/scf/CFcommon | 2 ++ .../selftests/rcutorture/configs/scf/NOPREEMPT | 9 +++++ .../rcutorture/configs/scf/NOPREEMPT.boot | 1 + .../selftests/rcutorture/configs/scf/PREEMPT | 9 +++++ .../rcutorture/configs/scf/ver_functions.sh | 30 +++++++++++++++++ 8 files changed, 92 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh create mode 100644 tools/testing/selftests/rcutorture/configs/scf/CFLIST create mode 100644 tools/testing/selftests/rcutorture/configs/scf/CFcommon create mode 100644 tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT create mode 100644 tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot create mode 100644 tools/testing/selftests/rcutorture/configs/scf/PREEMPT create mode 100644 tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh new file mode 100755 index 000000000000..671bfee4fcef --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Analyze a given results directory for rcutorture progress. +# +# Usage: kvm-recheck-rcu.sh resdir +# +# Copyright (C) Facebook, 2020 +# +# Authors: Paul E. McKenney + +i="$1" +if test -d "$i" -a -r "$i" +then + : +else + echo Unreadable results directory: $i + exit 1 +fi +. functions.sh + +configfile=`echo $i | sed -e 's/^.*\///'` +nscfs="`grep 'scf_invoked_count ver:' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* scf_invoked_count ver: //' -e 's/ .*$//' | tr -d '\015'`" +if test -z "$nscfs" +then + echo "$configfile ------- " +else + dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`" + if test -z "$dur" + then + rate="" + else + nscfss=`awk -v nscfs=$nscfs -v dur=$dur ' + BEGIN { print nscfs / dur }' < /dev/null` + rate=" ($nscfss/s)" + fi + echo "${configfile} ------- ${nscfs} SCF handler invocations$rate" +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index e655983b7429..44dfdd9be67e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -184,7 +184,7 @@ do shift ;; --torture) - checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\)$' '^--' + checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\|scf\)$' '^--' TORTURE_SUITE=$2 shift if test "$TORTURE_SUITE" = rcuperf || test "$TORTURE_SUITE" = refscale diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFLIST b/tools/testing/selftests/rcutorture/configs/scf/CFLIST new file mode 100644 index 000000000000..4d62eb4a39f9 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/CFLIST @@ -0,0 +1,2 @@ +NOPREEMPT +PREEMPT diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFcommon b/tools/testing/selftests/rcutorture/configs/scf/CFcommon new file mode 100644 index 000000000000..c11ab91f49f5 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/CFcommon @@ -0,0 +1,2 @@ +CONFIG_SCF_TORTURE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT new file mode 100644 index 000000000000..b8429d6c6ebc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT @@ -0,0 +1,9 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=n +CONFIG_NO_HZ_FULL=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot new file mode 100644 index 000000000000..d6a7fa097c2e --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot @@ -0,0 +1 @@ +nohz_full=1 diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT new file mode 100644 index 000000000000..ae4992b141b0 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT @@ -0,0 +1,9 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh new file mode 100644 index 000000000000..d3d9e35d3d55 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Torture-suite-dependent shell functions for the rest of the scripts. +# +# Copyright (C) Facebook, 2020 +# +# Authors: Paul E. McKenney + +# scftorture_param_onoff bootparam-string config-file +# +# Adds onoff scftorture module parameters to kernels having it. +scftorture_param_onoff () { + if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" + then + echo CPU-hotplug kernel, adding scftorture onoff. 1>&2 + echo scftorture.onoff_interval=1000 scftorture.onoff_holdoff=30 + fi +} + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `scftorture_param_onoff "$1" "$2"` \ + scftorture.stat_interval=15 \ + scftorture.shutdown_secs=$3 \ + scftorture.verbose=1 \ + scf +} -- cgit v1.3.1 From 4e88ec4a9eb17527e640b063f79e5b875733eb53 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 11 Aug 2020 21:18:12 -0700 Subject: rcuperf: Change rcuperf to rcuscale This commit further avoids conflation of rcuperf with the kernel's perf feature by renaming kernel/rcu/rcuperf.c to kernel/rcu/rcuscale.c, and also by similarly renaming the functions and variables inside this file. This has the side effect of changing the names of the kernel boot parameters, so kernel-parameters.txt and ver_functions.sh are also updated. The rcutorture --torture type was also updated from rcuperf to rcuscale. [ paulmck: Fix bugs located by Stephen Rothwell. ] Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 36 +- MAINTAINERS | 3 +- kernel/rcu/Kconfig.debug | 2 +- kernel/rcu/Makefile | 2 +- kernel/rcu/rcuperf.c | 853 --------------------- kernel/rcu/rcuscale.c | 853 +++++++++++++++++++++ .../rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh | 109 --- .../rcutorture/bin/kvm-recheck-rcuperf.sh | 83 -- .../rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh | 109 +++ .../rcutorture/bin/kvm-recheck-rcuscale.sh | 83 ++ tools/testing/selftests/rcutorture/bin/kvm.sh | 8 +- .../selftests/rcutorture/bin/parse-console.sh | 4 +- .../selftests/rcutorture/configs/rcuperf/CFLIST | 1 - .../selftests/rcutorture/configs/rcuperf/CFcommon | 2 - .../selftests/rcutorture/configs/rcuperf/TINY | 16 - .../selftests/rcutorture/configs/rcuperf/TREE | 19 - .../selftests/rcutorture/configs/rcuperf/TREE54 | 22 - .../rcutorture/configs/rcuperf/ver_functions.sh | 16 - .../selftests/rcutorture/configs/rcuscale/CFLIST | 1 + .../selftests/rcutorture/configs/rcuscale/CFcommon | 2 + .../selftests/rcutorture/configs/rcuscale/TINY | 16 + .../selftests/rcutorture/configs/rcuscale/TREE | 19 + .../selftests/rcutorture/configs/rcuscale/TREE54 | 22 + .../rcutorture/configs/rcuscale/ver_functions.sh | 16 + 24 files changed, 1149 insertions(+), 1148 deletions(-) delete mode 100644 kernel/rcu/rcuperf.c create mode 100644 kernel/rcu/rcuscale.c delete mode 100755 tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh delete mode 100755 tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh create mode 100755 tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh create mode 100755 tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh delete mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST delete mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon delete mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/TINY delete mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/TREE delete mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 delete mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TINY create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TREE create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh (limited to 'tools') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 91a56382ae56..c27bbe95e7cb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4157,41 +4157,41 @@ rcu_node tree with an eye towards determining why a new grace period has not yet started. - rcuperf.gp_async= [KNL] + rcuscale.gp_async= [KNL] Measure performance of asynchronous grace-period primitives such as call_rcu(). - rcuperf.gp_async_max= [KNL] + rcuscale.gp_async_max= [KNL] Specify the maximum number of outstanding callbacks per writer thread. When a writer thread exceeds this limit, it invokes the corresponding flavor of rcu_barrier() to allow previously posted callbacks to drain. - rcuperf.gp_exp= [KNL] + rcuscale.gp_exp= [KNL] Measure performance of expedited synchronous grace-period primitives. - rcuperf.holdoff= [KNL] + rcuscale.holdoff= [KNL] Set test-start holdoff period. The purpose of this parameter is to delay the start of the test until boot completes in order to avoid interference. - rcuperf.kfree_rcu_test= [KNL] + rcuscale.kfree_rcu_test= [KNL] Set to measure performance of kfree_rcu() flooding. - rcuperf.kfree_nthreads= [KNL] + rcuscale.kfree_nthreads= [KNL] The number of threads running loops of kfree_rcu(). - rcuperf.kfree_alloc_num= [KNL] + rcuscale.kfree_alloc_num= [KNL] Number of allocations and frees done in an iteration. - rcuperf.kfree_loops= [KNL] - Number of loops doing rcuperf.kfree_alloc_num number + rcuscale.kfree_loops= [KNL] + Number of loops doing rcuscale.kfree_alloc_num number of allocations and frees. - rcuperf.nreaders= [KNL] + rcuscale.nreaders= [KNL] Set number of RCU readers. The value -1 selects N, where N is the number of CPUs. A value "n" less than -1 selects N-n+1, where N is again @@ -4200,23 +4200,23 @@ A value of "n" less than or equal to -N selects a single reader. - rcuperf.nwriters= [KNL] + rcuscale.nwriters= [KNL] Set number of RCU writers. The values operate - the same as for rcuperf.nreaders. + the same as for rcuscale.nreaders. N, where N is the number of CPUs - rcuperf.perf_type= [KNL] + rcuscale.perf_type= [KNL] Specify the RCU implementation to test. - rcuperf.shutdown= [KNL] + rcuscale.shutdown= [KNL] Shut the system down after performance tests complete. This is useful for hands-off automated testing. - rcuperf.verbose= [KNL] + rcuscale.verbose= [KNL] Enable additional printk() statements. - rcuperf.writer_holdoff= [KNL] + rcuscale.writer_holdoff= [KNL] Write-side holdoff between grace periods, in microseconds. The default of zero says no holdoff. @@ -4490,8 +4490,8 @@ refscale.shutdown= [KNL] Shut down the system at the end of the performance test. This defaults to 1 (shut it down) when - rcuperf is built into the kernel and to 0 (leave - it running) when rcuperf is built as a module. + refscale is built into the kernel and to 0 (leave + it running) when refscale is built as a module. refscale.verbose= [KNL] Enable additional printk() statements. diff --git a/MAINTAINERS b/MAINTAINERS index deaafb617361..d299e3bb10ad 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17510,8 +17510,9 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: Documentation/RCU/torture.rst F: kernel/locking/locktorture.c -F: kernel/rcu/rcuperf.c +F: kernel/rcu/rcuscale.c F: kernel/rcu/rcutorture.c +F: kernel/rcu/refscale.c F: kernel/torture.c TOSHIBA ACPI EXTRAS DRIVER diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 3cf6132a4bb9..5cb175df6ece 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -23,7 +23,7 @@ config TORTURE_TEST tristate default n -config RCU_PERF_TEST +config RCU_SCALE_TEST tristate "performance tests for RCU" depends on DEBUG_KERNEL select TORTURE_TEST diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 95f5117ef8da..0cfb009a99b9 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -11,7 +11,7 @@ obj-y += update.o sync.o obj-$(CONFIG_TREE_SRCU) += srcutree.o obj-$(CONFIG_TINY_SRCU) += srcutiny.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o -obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o +obj-$(CONFIG_RCU_SCALE_TEST) += rcuscale.o obj-$(CONFIG_RCU_REF_SCALE_TEST) += refscale.o obj-$(CONFIG_TREE_RCU) += tree.o obj-$(CONFIG_TINY_RCU) += tiny.o diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c deleted file mode 100644 index 21448d3374e2..000000000000 --- a/kernel/rcu/rcuperf.c +++ /dev/null @@ -1,853 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Read-Copy Update module-based performance-test facility - * - * Copyright (C) IBM Corporation, 2015 - * - * Authors: Paul E. McKenney - */ - -#define pr_fmt(fmt) fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rcu.h" - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Paul E. McKenney "); - -#define PERF_FLAG "-perf:" -#define PERFOUT_STRING(s) \ - pr_alert("%s" PERF_FLAG " %s\n", perf_type, s) -#define VERBOSE_PERFOUT_STRING(s) \ - do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0) -#define VERBOSE_PERFOUT_ERRSTRING(s) \ - do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0) - -/* - * The intended use cases for the nreaders and nwriters module parameters - * are as follows: - * - * 1. Specify only the nr_cpus kernel boot parameter. This will - * set both nreaders and nwriters to the value specified by - * nr_cpus for a mixed reader/writer test. - * - * 2. Specify the nr_cpus kernel boot parameter, but set - * rcuperf.nreaders to zero. This will set nwriters to the - * value specified by nr_cpus for an update-only test. - * - * 3. Specify the nr_cpus kernel boot parameter, but set - * rcuperf.nwriters to zero. This will set nreaders to the - * value specified by nr_cpus for a read-only test. - * - * Various other use cases may of course be specified. - * - * Note that this test's readers are intended only as a test load for - * the writers. The reader performance statistics will be overly - * pessimistic due to the per-critical-section interrupt disabling, - * test-end checks, and the pair of calls through pointers. - */ - -#ifdef MODULE -# define RCUPERF_SHUTDOWN 0 -#else -# define RCUPERF_SHUTDOWN 1 -#endif - -torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); -torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader"); -torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); -torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); -torture_param(int, nreaders, -1, "Number of RCU reader threads"); -torture_param(int, nwriters, -1, "Number of RCU updater threads"); -torture_param(bool, shutdown, RCUPERF_SHUTDOWN, - "Shutdown at end of performance tests."); -torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); -torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); -torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?"); -torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate."); - -static char *perf_type = "rcu"; -module_param(perf_type, charp, 0444); -MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, srcu, ...)"); - -static int nrealreaders; -static int nrealwriters; -static struct task_struct **writer_tasks; -static struct task_struct **reader_tasks; -static struct task_struct *shutdown_task; - -static u64 **writer_durations; -static int *writer_n_durations; -static atomic_t n_rcu_perf_reader_started; -static atomic_t n_rcu_perf_writer_started; -static atomic_t n_rcu_perf_writer_finished; -static wait_queue_head_t shutdown_wq; -static u64 t_rcu_perf_writer_started; -static u64 t_rcu_perf_writer_finished; -static unsigned long b_rcu_gp_test_started; -static unsigned long b_rcu_gp_test_finished; -static DEFINE_PER_CPU(atomic_t, n_async_inflight); - -#define MAX_MEAS 10000 -#define MIN_MEAS 100 - -/* - * Operations vector for selecting different types of tests. - */ - -struct rcu_perf_ops { - int ptype; - void (*init)(void); - void (*cleanup)(void); - int (*readlock)(void); - void (*readunlock)(int idx); - unsigned long (*get_gp_seq)(void); - unsigned long (*gp_diff)(unsigned long new, unsigned long old); - unsigned long (*exp_completed)(void); - void (*async)(struct rcu_head *head, rcu_callback_t func); - void (*gp_barrier)(void); - void (*sync)(void); - void (*exp_sync)(void); - const char *name; -}; - -static struct rcu_perf_ops *cur_ops; - -/* - * Definitions for rcu perf testing. - */ - -static int rcu_perf_read_lock(void) __acquires(RCU) -{ - rcu_read_lock(); - return 0; -} - -static void rcu_perf_read_unlock(int idx) __releases(RCU) -{ - rcu_read_unlock(); -} - -static unsigned long __maybe_unused rcu_no_completed(void) -{ - return 0; -} - -static void rcu_sync_perf_init(void) -{ -} - -static struct rcu_perf_ops rcu_ops = { - .ptype = RCU_FLAVOR, - .init = rcu_sync_perf_init, - .readlock = rcu_perf_read_lock, - .readunlock = rcu_perf_read_unlock, - .get_gp_seq = rcu_get_gp_seq, - .gp_diff = rcu_seq_diff, - .exp_completed = rcu_exp_batches_completed, - .async = call_rcu, - .gp_barrier = rcu_barrier, - .sync = synchronize_rcu, - .exp_sync = synchronize_rcu_expedited, - .name = "rcu" -}; - -/* - * Definitions for srcu perf testing. - */ - -DEFINE_STATIC_SRCU(srcu_ctl_perf); -static struct srcu_struct *srcu_ctlp = &srcu_ctl_perf; - -static int srcu_perf_read_lock(void) __acquires(srcu_ctlp) -{ - return srcu_read_lock(srcu_ctlp); -} - -static void srcu_perf_read_unlock(int idx) __releases(srcu_ctlp) -{ - srcu_read_unlock(srcu_ctlp, idx); -} - -static unsigned long srcu_perf_completed(void) -{ - return srcu_batches_completed(srcu_ctlp); -} - -static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func) -{ - call_srcu(srcu_ctlp, head, func); -} - -static void srcu_rcu_barrier(void) -{ - srcu_barrier(srcu_ctlp); -} - -static void srcu_perf_synchronize(void) -{ - synchronize_srcu(srcu_ctlp); -} - -static void srcu_perf_synchronize_expedited(void) -{ - synchronize_srcu_expedited(srcu_ctlp); -} - -static struct rcu_perf_ops srcu_ops = { - .ptype = SRCU_FLAVOR, - .init = rcu_sync_perf_init, - .readlock = srcu_perf_read_lock, - .readunlock = srcu_perf_read_unlock, - .get_gp_seq = srcu_perf_completed, - .gp_diff = rcu_seq_diff, - .exp_completed = srcu_perf_completed, - .async = srcu_call_rcu, - .gp_barrier = srcu_rcu_barrier, - .sync = srcu_perf_synchronize, - .exp_sync = srcu_perf_synchronize_expedited, - .name = "srcu" -}; - -static struct srcu_struct srcud; - -static void srcu_sync_perf_init(void) -{ - srcu_ctlp = &srcud; - init_srcu_struct(srcu_ctlp); -} - -static void srcu_sync_perf_cleanup(void) -{ - cleanup_srcu_struct(srcu_ctlp); -} - -static struct rcu_perf_ops srcud_ops = { - .ptype = SRCU_FLAVOR, - .init = srcu_sync_perf_init, - .cleanup = srcu_sync_perf_cleanup, - .readlock = srcu_perf_read_lock, - .readunlock = srcu_perf_read_unlock, - .get_gp_seq = srcu_perf_completed, - .gp_diff = rcu_seq_diff, - .exp_completed = srcu_perf_completed, - .async = srcu_call_rcu, - .gp_barrier = srcu_rcu_barrier, - .sync = srcu_perf_synchronize, - .exp_sync = srcu_perf_synchronize_expedited, - .name = "srcud" -}; - -/* - * Definitions for RCU-tasks perf testing. - */ - -static int tasks_perf_read_lock(void) -{ - return 0; -} - -static void tasks_perf_read_unlock(int idx) -{ -} - -static struct rcu_perf_ops tasks_ops = { - .ptype = RCU_TASKS_FLAVOR, - .init = rcu_sync_perf_init, - .readlock = tasks_perf_read_lock, - .readunlock = tasks_perf_read_unlock, - .get_gp_seq = rcu_no_completed, - .gp_diff = rcu_seq_diff, - .async = call_rcu_tasks, - .gp_barrier = rcu_barrier_tasks, - .sync = synchronize_rcu_tasks, - .exp_sync = synchronize_rcu_tasks, - .name = "tasks" -}; - -static unsigned long rcuperf_seq_diff(unsigned long new, unsigned long old) -{ - if (!cur_ops->gp_diff) - return new - old; - return cur_ops->gp_diff(new, old); -} - -/* - * If performance tests complete, wait for shutdown to commence. - */ -static void rcu_perf_wait_shutdown(void) -{ - cond_resched_tasks_rcu_qs(); - if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters) - return; - while (!torture_must_stop()) - schedule_timeout_uninterruptible(1); -} - -/* - * RCU perf reader kthread. Repeatedly does empty RCU read-side critical - * section, minimizing update-side interference. However, the point of - * this test is not to evaluate reader performance, but instead to serve - * as a test load for update-side performance testing. - */ -static int -rcu_perf_reader(void *arg) -{ - unsigned long flags; - int idx; - long me = (long)arg; - - VERBOSE_PERFOUT_STRING("rcu_perf_reader task started"); - set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); - set_user_nice(current, MAX_NICE); - atomic_inc(&n_rcu_perf_reader_started); - - do { - local_irq_save(flags); - idx = cur_ops->readlock(); - cur_ops->readunlock(idx); - local_irq_restore(flags); - rcu_perf_wait_shutdown(); - } while (!torture_must_stop()); - torture_kthread_stopping("rcu_perf_reader"); - return 0; -} - -/* - * Callback function for asynchronous grace periods from rcu_perf_writer(). - */ -static void rcu_perf_async_cb(struct rcu_head *rhp) -{ - atomic_dec(this_cpu_ptr(&n_async_inflight)); - kfree(rhp); -} - -/* - * RCU perf writer kthread. Repeatedly does a grace period. - */ -static int -rcu_perf_writer(void *arg) -{ - int i = 0; - int i_max; - long me = (long)arg; - struct rcu_head *rhp = NULL; - bool started = false, done = false, alldone = false; - u64 t; - u64 *wdp; - u64 *wdpp = writer_durations[me]; - - VERBOSE_PERFOUT_STRING("rcu_perf_writer task started"); - WARN_ON(!wdpp); - set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); - sched_set_fifo_low(current); - - if (holdoff) - schedule_timeout_uninterruptible(holdoff * HZ); - - /* - * Wait until rcu_end_inkernel_boot() is called for normal GP tests - * so that RCU is not always expedited for normal GP tests. - * The system_state test is approximate, but works well in practice. - */ - while (!gp_exp && system_state != SYSTEM_RUNNING) - schedule_timeout_uninterruptible(1); - - t = ktime_get_mono_fast_ns(); - if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) { - t_rcu_perf_writer_started = t; - if (gp_exp) { - b_rcu_gp_test_started = - cur_ops->exp_completed() / 2; - } else { - b_rcu_gp_test_started = cur_ops->get_gp_seq(); - } - } - - do { - if (writer_holdoff) - udelay(writer_holdoff); - wdp = &wdpp[i]; - *wdp = ktime_get_mono_fast_ns(); - if (gp_async) { -retry: - if (!rhp) - rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); - if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) { - atomic_inc(this_cpu_ptr(&n_async_inflight)); - cur_ops->async(rhp, rcu_perf_async_cb); - rhp = NULL; - } else if (!kthread_should_stop()) { - cur_ops->gp_barrier(); - goto retry; - } else { - kfree(rhp); /* Because we are stopping. */ - } - } else if (gp_exp) { - cur_ops->exp_sync(); - } else { - cur_ops->sync(); - } - t = ktime_get_mono_fast_ns(); - *wdp = t - *wdp; - i_max = i; - if (!started && - atomic_read(&n_rcu_perf_writer_started) >= nrealwriters) - started = true; - if (!done && i >= MIN_MEAS) { - done = true; - sched_set_normal(current, 0); - pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n", - perf_type, PERF_FLAG, me, MIN_MEAS); - if (atomic_inc_return(&n_rcu_perf_writer_finished) >= - nrealwriters) { - schedule_timeout_interruptible(10); - rcu_ftrace_dump(DUMP_ALL); - PERFOUT_STRING("Test complete"); - t_rcu_perf_writer_finished = t; - if (gp_exp) { - b_rcu_gp_test_finished = - cur_ops->exp_completed() / 2; - } else { - b_rcu_gp_test_finished = - cur_ops->get_gp_seq(); - } - if (shutdown) { - smp_mb(); /* Assign before wake. */ - wake_up(&shutdown_wq); - } - } - } - if (done && !alldone && - atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters) - alldone = true; - if (started && !alldone && i < MAX_MEAS - 1) - i++; - rcu_perf_wait_shutdown(); - } while (!torture_must_stop()); - if (gp_async) { - cur_ops->gp_barrier(); - } - writer_n_durations[me] = i_max; - torture_kthread_stopping("rcu_perf_writer"); - return 0; -} - -static void -rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag) -{ - pr_alert("%s" PERF_FLAG - "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n", - perf_type, tag, nrealreaders, nrealwriters, verbose, shutdown); -} - -static void -rcu_perf_cleanup(void) -{ - int i; - int j; - int ngps = 0; - u64 *wdp; - u64 *wdpp; - - /* - * Would like warning at start, but everything is expedited - * during the mid-boot phase, so have to wait till the end. - */ - if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) - VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); - if (rcu_gp_is_normal() && gp_exp) - VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); - if (gp_exp && gp_async) - VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!"); - - if (torture_cleanup_begin()) - return; - if (!cur_ops) { - torture_cleanup_end(); - return; - } - - if (reader_tasks) { - for (i = 0; i < nrealreaders; i++) - torture_stop_kthread(rcu_perf_reader, - reader_tasks[i]); - kfree(reader_tasks); - } - - if (writer_tasks) { - for (i = 0; i < nrealwriters; i++) { - torture_stop_kthread(rcu_perf_writer, - writer_tasks[i]); - if (!writer_n_durations) - continue; - j = writer_n_durations[i]; - pr_alert("%s%s writer %d gps: %d\n", - perf_type, PERF_FLAG, i, j); - ngps += j; - } - pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n", - perf_type, PERF_FLAG, - t_rcu_perf_writer_started, t_rcu_perf_writer_finished, - t_rcu_perf_writer_finished - - t_rcu_perf_writer_started, - ngps, - rcuperf_seq_diff(b_rcu_gp_test_finished, - b_rcu_gp_test_started)); - for (i = 0; i < nrealwriters; i++) { - if (!writer_durations) - break; - if (!writer_n_durations) - continue; - wdpp = writer_durations[i]; - if (!wdpp) - continue; - for (j = 0; j <= writer_n_durations[i]; j++) { - wdp = &wdpp[j]; - pr_alert("%s%s %4d writer-duration: %5d %llu\n", - perf_type, PERF_FLAG, - i, j, *wdp); - if (j % 100 == 0) - schedule_timeout_uninterruptible(1); - } - kfree(writer_durations[i]); - } - kfree(writer_tasks); - kfree(writer_durations); - kfree(writer_n_durations); - } - - /* Do torture-type-specific cleanup operations. */ - if (cur_ops->cleanup != NULL) - cur_ops->cleanup(); - - torture_cleanup_end(); -} - -/* - * Return the number if non-negative. If -1, the number of CPUs. - * If less than -1, that much less than the number of CPUs, but - * at least one. - */ -static int compute_real(int n) -{ - int nr; - - if (n >= 0) { - nr = n; - } else { - nr = num_online_cpus() + 1 + n; - if (nr <= 0) - nr = 1; - } - return nr; -} - -/* - * RCU perf shutdown kthread. Just waits to be awakened, then shuts - * down system. - */ -static int -rcu_perf_shutdown(void *arg) -{ - wait_event(shutdown_wq, - atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters); - smp_mb(); /* Wake before output. */ - rcu_perf_cleanup(); - kernel_power_off(); - return -EINVAL; -} - -/* - * kfree_rcu() performance tests: Start a kfree_rcu() loop on all CPUs for number - * of iterations and measure total time and number of GP for all iterations to complete. - */ - -torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu()."); -torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration."); -torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees."); - -static struct task_struct **kfree_reader_tasks; -static int kfree_nrealthreads; -static atomic_t n_kfree_perf_thread_started; -static atomic_t n_kfree_perf_thread_ended; - -struct kfree_obj { - char kfree_obj[8]; - struct rcu_head rh; -}; - -static int -kfree_perf_thread(void *arg) -{ - int i, loop = 0; - long me = (long)arg; - struct kfree_obj *alloc_ptr; - u64 start_time, end_time; - long long mem_begin, mem_during = 0; - - VERBOSE_PERFOUT_STRING("kfree_perf_thread task started"); - set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); - set_user_nice(current, MAX_NICE); - - start_time = ktime_get_mono_fast_ns(); - - if (atomic_inc_return(&n_kfree_perf_thread_started) >= kfree_nrealthreads) { - if (gp_exp) - b_rcu_gp_test_started = cur_ops->exp_completed() / 2; - else - b_rcu_gp_test_started = cur_ops->get_gp_seq(); - } - - do { - if (!mem_during) { - mem_during = mem_begin = si_mem_available(); - } else if (loop % (kfree_loops / 4) == 0) { - mem_during = (mem_during + si_mem_available()) / 2; - } - - for (i = 0; i < kfree_alloc_num; i++) { - alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL); - if (!alloc_ptr) - return -ENOMEM; - - kfree_rcu(alloc_ptr, rh); - } - - cond_resched(); - } while (!torture_must_stop() && ++loop < kfree_loops); - - if (atomic_inc_return(&n_kfree_perf_thread_ended) >= kfree_nrealthreads) { - end_time = ktime_get_mono_fast_ns(); - - if (gp_exp) - b_rcu_gp_test_finished = cur_ops->exp_completed() / 2; - else - b_rcu_gp_test_finished = cur_ops->get_gp_seq(); - - pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld, memory footprint: %lldMB\n", - (unsigned long long)(end_time - start_time), kfree_loops, - rcuperf_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started), - (mem_begin - mem_during) >> (20 - PAGE_SHIFT)); - - if (shutdown) { - smp_mb(); /* Assign before wake. */ - wake_up(&shutdown_wq); - } - } - - torture_kthread_stopping("kfree_perf_thread"); - return 0; -} - -static void -kfree_perf_cleanup(void) -{ - int i; - - if (torture_cleanup_begin()) - return; - - if (kfree_reader_tasks) { - for (i = 0; i < kfree_nrealthreads; i++) - torture_stop_kthread(kfree_perf_thread, - kfree_reader_tasks[i]); - kfree(kfree_reader_tasks); - } - - torture_cleanup_end(); -} - -/* - * shutdown kthread. Just waits to be awakened, then shuts down system. - */ -static int -kfree_perf_shutdown(void *arg) -{ - wait_event(shutdown_wq, - atomic_read(&n_kfree_perf_thread_ended) >= kfree_nrealthreads); - - smp_mb(); /* Wake before output. */ - - kfree_perf_cleanup(); - kernel_power_off(); - return -EINVAL; -} - -static int __init -kfree_perf_init(void) -{ - long i; - int firsterr = 0; - - kfree_nrealthreads = compute_real(kfree_nthreads); - /* Start up the kthreads. */ - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(kfree_perf_shutdown, NULL, - shutdown_task); - if (firsterr) - goto unwind; - schedule_timeout_uninterruptible(1); - } - - pr_alert("kfree object size=%zu\n", kfree_mult * sizeof(struct kfree_obj)); - - kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]), - GFP_KERNEL); - if (kfree_reader_tasks == NULL) { - firsterr = -ENOMEM; - goto unwind; - } - - for (i = 0; i < kfree_nrealthreads; i++) { - firsterr = torture_create_kthread(kfree_perf_thread, (void *)i, - kfree_reader_tasks[i]); - if (firsterr) - goto unwind; - } - - while (atomic_read(&n_kfree_perf_thread_started) < kfree_nrealthreads) - schedule_timeout_uninterruptible(1); - - torture_init_end(); - return 0; - -unwind: - torture_init_end(); - kfree_perf_cleanup(); - return firsterr; -} - -static int __init -rcu_perf_init(void) -{ - long i; - int firsterr = 0; - static struct rcu_perf_ops *perf_ops[] = { - &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, - }; - - if (!torture_init_begin(perf_type, verbose)) - return -EBUSY; - - /* Process args and tell the world that the perf'er is on the job. */ - for (i = 0; i < ARRAY_SIZE(perf_ops); i++) { - cur_ops = perf_ops[i]; - if (strcmp(perf_type, cur_ops->name) == 0) - break; - } - if (i == ARRAY_SIZE(perf_ops)) { - pr_alert("rcu-perf: invalid perf type: \"%s\"\n", perf_type); - pr_alert("rcu-perf types:"); - for (i = 0; i < ARRAY_SIZE(perf_ops); i++) - pr_cont(" %s", perf_ops[i]->name); - pr_cont("\n"); - WARN_ON(!IS_MODULE(CONFIG_RCU_PERF_TEST)); - firsterr = -EINVAL; - cur_ops = NULL; - goto unwind; - } - if (cur_ops->init) - cur_ops->init(); - - if (kfree_rcu_test) - return kfree_perf_init(); - - nrealwriters = compute_real(nwriters); - nrealreaders = compute_real(nreaders); - atomic_set(&n_rcu_perf_reader_started, 0); - atomic_set(&n_rcu_perf_writer_started, 0); - atomic_set(&n_rcu_perf_writer_finished, 0); - rcu_perf_print_module_parms(cur_ops, "Start of test"); - - /* Start up the kthreads. */ - - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(rcu_perf_shutdown, NULL, - shutdown_task); - if (firsterr) - goto unwind; - schedule_timeout_uninterruptible(1); - } - reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]), - GFP_KERNEL); - if (reader_tasks == NULL) { - VERBOSE_PERFOUT_ERRSTRING("out of memory"); - firsterr = -ENOMEM; - goto unwind; - } - for (i = 0; i < nrealreaders; i++) { - firsterr = torture_create_kthread(rcu_perf_reader, (void *)i, - reader_tasks[i]); - if (firsterr) - goto unwind; - } - while (atomic_read(&n_rcu_perf_reader_started) < nrealreaders) - schedule_timeout_uninterruptible(1); - writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]), - GFP_KERNEL); - writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), - GFP_KERNEL); - writer_n_durations = - kcalloc(nrealwriters, sizeof(*writer_n_durations), - GFP_KERNEL); - if (!writer_tasks || !writer_durations || !writer_n_durations) { - VERBOSE_PERFOUT_ERRSTRING("out of memory"); - firsterr = -ENOMEM; - goto unwind; - } - for (i = 0; i < nrealwriters; i++) { - writer_durations[i] = - kcalloc(MAX_MEAS, sizeof(*writer_durations[i]), - GFP_KERNEL); - if (!writer_durations[i]) { - firsterr = -ENOMEM; - goto unwind; - } - firsterr = torture_create_kthread(rcu_perf_writer, (void *)i, - writer_tasks[i]); - if (firsterr) - goto unwind; - } - torture_init_end(); - return 0; - -unwind: - torture_init_end(); - rcu_perf_cleanup(); - return firsterr; -} - -module_init(rcu_perf_init); -module_exit(rcu_perf_cleanup); diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c new file mode 100644 index 000000000000..2819b95479af --- /dev/null +++ b/kernel/rcu/rcuscale.c @@ -0,0 +1,853 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Read-Copy Update module-based scalability-test facility + * + * Copyright (C) IBM Corporation, 2015 + * + * Authors: Paul E. McKenney + */ + +#define pr_fmt(fmt) fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rcu.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Paul E. McKenney "); + +#define SCALE_FLAG "-scale:" +#define SCALEOUT_STRING(s) \ + pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s) +#define VERBOSE_SCALEOUT_STRING(s) \ + do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0) +#define VERBOSE_SCALEOUT_ERRSTRING(s) \ + do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s); } while (0) + +/* + * The intended use cases for the nreaders and nwriters module parameters + * are as follows: + * + * 1. Specify only the nr_cpus kernel boot parameter. This will + * set both nreaders and nwriters to the value specified by + * nr_cpus for a mixed reader/writer test. + * + * 2. Specify the nr_cpus kernel boot parameter, but set + * rcuscale.nreaders to zero. This will set nwriters to the + * value specified by nr_cpus for an update-only test. + * + * 3. Specify the nr_cpus kernel boot parameter, but set + * rcuscale.nwriters to zero. This will set nreaders to the + * value specified by nr_cpus for a read-only test. + * + * Various other use cases may of course be specified. + * + * Note that this test's readers are intended only as a test load for + * the writers. The reader scalability statistics will be overly + * pessimistic due to the per-critical-section interrupt disabling, + * test-end checks, and the pair of calls through pointers. + */ + +#ifdef MODULE +# define RCUSCALE_SHUTDOWN 0 +#else +# define RCUSCALE_SHUTDOWN 1 +#endif + +torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); +torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader"); +torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); +torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); +torture_param(int, nreaders, -1, "Number of RCU reader threads"); +torture_param(int, nwriters, -1, "Number of RCU updater threads"); +torture_param(bool, shutdown, RCUSCALE_SHUTDOWN, + "Shutdown at end of scalability tests."); +torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); +torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); +torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?"); +torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate."); + +static char *scale_type = "rcu"; +module_param(scale_type, charp, 0444); +MODULE_PARM_DESC(scale_type, "Type of RCU to scalability-test (rcu, srcu, ...)"); + +static int nrealreaders; +static int nrealwriters; +static struct task_struct **writer_tasks; +static struct task_struct **reader_tasks; +static struct task_struct *shutdown_task; + +static u64 **writer_durations; +static int *writer_n_durations; +static atomic_t n_rcu_scale_reader_started; +static atomic_t n_rcu_scale_writer_started; +static atomic_t n_rcu_scale_writer_finished; +static wait_queue_head_t shutdown_wq; +static u64 t_rcu_scale_writer_started; +static u64 t_rcu_scale_writer_finished; +static unsigned long b_rcu_gp_test_started; +static unsigned long b_rcu_gp_test_finished; +static DEFINE_PER_CPU(atomic_t, n_async_inflight); + +#define MAX_MEAS 10000 +#define MIN_MEAS 100 + +/* + * Operations vector for selecting different types of tests. + */ + +struct rcu_scale_ops { + int ptype; + void (*init)(void); + void (*cleanup)(void); + int (*readlock)(void); + void (*readunlock)(int idx); + unsigned long (*get_gp_seq)(void); + unsigned long (*gp_diff)(unsigned long new, unsigned long old); + unsigned long (*exp_completed)(void); + void (*async)(struct rcu_head *head, rcu_callback_t func); + void (*gp_barrier)(void); + void (*sync)(void); + void (*exp_sync)(void); + const char *name; +}; + +static struct rcu_scale_ops *cur_ops; + +/* + * Definitions for rcu scalability testing. + */ + +static int rcu_scale_read_lock(void) __acquires(RCU) +{ + rcu_read_lock(); + return 0; +} + +static void rcu_scale_read_unlock(int idx) __releases(RCU) +{ + rcu_read_unlock(); +} + +static unsigned long __maybe_unused rcu_no_completed(void) +{ + return 0; +} + +static void rcu_sync_scale_init(void) +{ +} + +static struct rcu_scale_ops rcu_ops = { + .ptype = RCU_FLAVOR, + .init = rcu_sync_scale_init, + .readlock = rcu_scale_read_lock, + .readunlock = rcu_scale_read_unlock, + .get_gp_seq = rcu_get_gp_seq, + .gp_diff = rcu_seq_diff, + .exp_completed = rcu_exp_batches_completed, + .async = call_rcu, + .gp_barrier = rcu_barrier, + .sync = synchronize_rcu, + .exp_sync = synchronize_rcu_expedited, + .name = "rcu" +}; + +/* + * Definitions for srcu scalability testing. + */ + +DEFINE_STATIC_SRCU(srcu_ctl_scale); +static struct srcu_struct *srcu_ctlp = &srcu_ctl_scale; + +static int srcu_scale_read_lock(void) __acquires(srcu_ctlp) +{ + return srcu_read_lock(srcu_ctlp); +} + +static void srcu_scale_read_unlock(int idx) __releases(srcu_ctlp) +{ + srcu_read_unlock(srcu_ctlp, idx); +} + +static unsigned long srcu_scale_completed(void) +{ + return srcu_batches_completed(srcu_ctlp); +} + +static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func) +{ + call_srcu(srcu_ctlp, head, func); +} + +static void srcu_rcu_barrier(void) +{ + srcu_barrier(srcu_ctlp); +} + +static void srcu_scale_synchronize(void) +{ + synchronize_srcu(srcu_ctlp); +} + +static void srcu_scale_synchronize_expedited(void) +{ + synchronize_srcu_expedited(srcu_ctlp); +} + +static struct rcu_scale_ops srcu_ops = { + .ptype = SRCU_FLAVOR, + .init = rcu_sync_scale_init, + .readlock = srcu_scale_read_lock, + .readunlock = srcu_scale_read_unlock, + .get_gp_seq = srcu_scale_completed, + .gp_diff = rcu_seq_diff, + .exp_completed = srcu_scale_completed, + .async = srcu_call_rcu, + .gp_barrier = srcu_rcu_barrier, + .sync = srcu_scale_synchronize, + .exp_sync = srcu_scale_synchronize_expedited, + .name = "srcu" +}; + +static struct srcu_struct srcud; + +static void srcu_sync_scale_init(void) +{ + srcu_ctlp = &srcud; + init_srcu_struct(srcu_ctlp); +} + +static void srcu_sync_scale_cleanup(void) +{ + cleanup_srcu_struct(srcu_ctlp); +} + +static struct rcu_scale_ops srcud_ops = { + .ptype = SRCU_FLAVOR, + .init = srcu_sync_scale_init, + .cleanup = srcu_sync_scale_cleanup, + .readlock = srcu_scale_read_lock, + .readunlock = srcu_scale_read_unlock, + .get_gp_seq = srcu_scale_completed, + .gp_diff = rcu_seq_diff, + .exp_completed = srcu_scale_completed, + .async = srcu_call_rcu, + .gp_barrier = srcu_rcu_barrier, + .sync = srcu_scale_synchronize, + .exp_sync = srcu_scale_synchronize_expedited, + .name = "srcud" +}; + +/* + * Definitions for RCU-tasks scalability testing. + */ + +static int tasks_scale_read_lock(void) +{ + return 0; +} + +static void tasks_scale_read_unlock(int idx) +{ +} + +static struct rcu_scale_ops tasks_ops = { + .ptype = RCU_TASKS_FLAVOR, + .init = rcu_sync_scale_init, + .readlock = tasks_scale_read_lock, + .readunlock = tasks_scale_read_unlock, + .get_gp_seq = rcu_no_completed, + .gp_diff = rcu_seq_diff, + .async = call_rcu_tasks, + .gp_barrier = rcu_barrier_tasks, + .sync = synchronize_rcu_tasks, + .exp_sync = synchronize_rcu_tasks, + .name = "tasks" +}; + +static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old) +{ + if (!cur_ops->gp_diff) + return new - old; + return cur_ops->gp_diff(new, old); +} + +/* + * If scalability tests complete, wait for shutdown to commence. + */ +static void rcu_scale_wait_shutdown(void) +{ + cond_resched_tasks_rcu_qs(); + if (atomic_read(&n_rcu_scale_writer_finished) < nrealwriters) + return; + while (!torture_must_stop()) + schedule_timeout_uninterruptible(1); +} + +/* + * RCU scalability reader kthread. Repeatedly does empty RCU read-side + * critical section, minimizing update-side interference. However, the + * point of this test is not to evaluate reader scalability, but instead + * to serve as a test load for update-side scalability testing. + */ +static int +rcu_scale_reader(void *arg) +{ + unsigned long flags; + int idx; + long me = (long)arg; + + VERBOSE_SCALEOUT_STRING("rcu_scale_reader task started"); + set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + set_user_nice(current, MAX_NICE); + atomic_inc(&n_rcu_scale_reader_started); + + do { + local_irq_save(flags); + idx = cur_ops->readlock(); + cur_ops->readunlock(idx); + local_irq_restore(flags); + rcu_scale_wait_shutdown(); + } while (!torture_must_stop()); + torture_kthread_stopping("rcu_scale_reader"); + return 0; +} + +/* + * Callback function for asynchronous grace periods from rcu_scale_writer(). + */ +static void rcu_scale_async_cb(struct rcu_head *rhp) +{ + atomic_dec(this_cpu_ptr(&n_async_inflight)); + kfree(rhp); +} + +/* + * RCU scale writer kthread. Repeatedly does a grace period. + */ +static int +rcu_scale_writer(void *arg) +{ + int i = 0; + int i_max; + long me = (long)arg; + struct rcu_head *rhp = NULL; + bool started = false, done = false, alldone = false; + u64 t; + u64 *wdp; + u64 *wdpp = writer_durations[me]; + + VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started"); + WARN_ON(!wdpp); + set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + sched_set_fifo_low(current); + + if (holdoff) + schedule_timeout_uninterruptible(holdoff * HZ); + + /* + * Wait until rcu_end_inkernel_boot() is called for normal GP tests + * so that RCU is not always expedited for normal GP tests. + * The system_state test is approximate, but works well in practice. + */ + while (!gp_exp && system_state != SYSTEM_RUNNING) + schedule_timeout_uninterruptible(1); + + t = ktime_get_mono_fast_ns(); + if (atomic_inc_return(&n_rcu_scale_writer_started) >= nrealwriters) { + t_rcu_scale_writer_started = t; + if (gp_exp) { + b_rcu_gp_test_started = + cur_ops->exp_completed() / 2; + } else { + b_rcu_gp_test_started = cur_ops->get_gp_seq(); + } + } + + do { + if (writer_holdoff) + udelay(writer_holdoff); + wdp = &wdpp[i]; + *wdp = ktime_get_mono_fast_ns(); + if (gp_async) { +retry: + if (!rhp) + rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); + if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) { + atomic_inc(this_cpu_ptr(&n_async_inflight)); + cur_ops->async(rhp, rcu_scale_async_cb); + rhp = NULL; + } else if (!kthread_should_stop()) { + cur_ops->gp_barrier(); + goto retry; + } else { + kfree(rhp); /* Because we are stopping. */ + } + } else if (gp_exp) { + cur_ops->exp_sync(); + } else { + cur_ops->sync(); + } + t = ktime_get_mono_fast_ns(); + *wdp = t - *wdp; + i_max = i; + if (!started && + atomic_read(&n_rcu_scale_writer_started) >= nrealwriters) + started = true; + if (!done && i >= MIN_MEAS) { + done = true; + sched_set_normal(current, 0); + pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n", + scale_type, SCALE_FLAG, me, MIN_MEAS); + if (atomic_inc_return(&n_rcu_scale_writer_finished) >= + nrealwriters) { + schedule_timeout_interruptible(10); + rcu_ftrace_dump(DUMP_ALL); + SCALEOUT_STRING("Test complete"); + t_rcu_scale_writer_finished = t; + if (gp_exp) { + b_rcu_gp_test_finished = + cur_ops->exp_completed() / 2; + } else { + b_rcu_gp_test_finished = + cur_ops->get_gp_seq(); + } + if (shutdown) { + smp_mb(); /* Assign before wake. */ + wake_up(&shutdown_wq); + } + } + } + if (done && !alldone && + atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters) + alldone = true; + if (started && !alldone && i < MAX_MEAS - 1) + i++; + rcu_scale_wait_shutdown(); + } while (!torture_must_stop()); + if (gp_async) { + cur_ops->gp_barrier(); + } + writer_n_durations[me] = i_max; + torture_kthread_stopping("rcu_scale_writer"); + return 0; +} + +static void +rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag) +{ + pr_alert("%s" SCALE_FLAG + "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n", + scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown); +} + +static void +rcu_scale_cleanup(void) +{ + int i; + int j; + int ngps = 0; + u64 *wdp; + u64 *wdpp; + + /* + * Would like warning at start, but everything is expedited + * during the mid-boot phase, so have to wait till the end. + */ + if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) + VERBOSE_SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); + if (rcu_gp_is_normal() && gp_exp) + VERBOSE_SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); + if (gp_exp && gp_async) + VERBOSE_SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); + + if (torture_cleanup_begin()) + return; + if (!cur_ops) { + torture_cleanup_end(); + return; + } + + if (reader_tasks) { + for (i = 0; i < nrealreaders; i++) + torture_stop_kthread(rcu_scale_reader, + reader_tasks[i]); + kfree(reader_tasks); + } + + if (writer_tasks) { + for (i = 0; i < nrealwriters; i++) { + torture_stop_kthread(rcu_scale_writer, + writer_tasks[i]); + if (!writer_n_durations) + continue; + j = writer_n_durations[i]; + pr_alert("%s%s writer %d gps: %d\n", + scale_type, SCALE_FLAG, i, j); + ngps += j; + } + pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n", + scale_type, SCALE_FLAG, + t_rcu_scale_writer_started, t_rcu_scale_writer_finished, + t_rcu_scale_writer_finished - + t_rcu_scale_writer_started, + ngps, + rcuscale_seq_diff(b_rcu_gp_test_finished, + b_rcu_gp_test_started)); + for (i = 0; i < nrealwriters; i++) { + if (!writer_durations) + break; + if (!writer_n_durations) + continue; + wdpp = writer_durations[i]; + if (!wdpp) + continue; + for (j = 0; j <= writer_n_durations[i]; j++) { + wdp = &wdpp[j]; + pr_alert("%s%s %4d writer-duration: %5d %llu\n", + scale_type, SCALE_FLAG, + i, j, *wdp); + if (j % 100 == 0) + schedule_timeout_uninterruptible(1); + } + kfree(writer_durations[i]); + } + kfree(writer_tasks); + kfree(writer_durations); + kfree(writer_n_durations); + } + + /* Do torture-type-specific cleanup operations. */ + if (cur_ops->cleanup != NULL) + cur_ops->cleanup(); + + torture_cleanup_end(); +} + +/* + * Return the number if non-negative. If -1, the number of CPUs. + * If less than -1, that much less than the number of CPUs, but + * at least one. + */ +static int compute_real(int n) +{ + int nr; + + if (n >= 0) { + nr = n; + } else { + nr = num_online_cpus() + 1 + n; + if (nr <= 0) + nr = 1; + } + return nr; +} + +/* + * RCU scalability shutdown kthread. Just waits to be awakened, then shuts + * down system. + */ +static int +rcu_scale_shutdown(void *arg) +{ + wait_event(shutdown_wq, + atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); + smp_mb(); /* Wake before output. */ + rcu_scale_cleanup(); + kernel_power_off(); + return -EINVAL; +} + +/* + * kfree_rcu() scalability tests: Start a kfree_rcu() loop on all CPUs for number + * of iterations and measure total time and number of GP for all iterations to complete. + */ + +torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu()."); +torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration."); +torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees."); + +static struct task_struct **kfree_reader_tasks; +static int kfree_nrealthreads; +static atomic_t n_kfree_scale_thread_started; +static atomic_t n_kfree_scale_thread_ended; + +struct kfree_obj { + char kfree_obj[8]; + struct rcu_head rh; +}; + +static int +kfree_scale_thread(void *arg) +{ + int i, loop = 0; + long me = (long)arg; + struct kfree_obj *alloc_ptr; + u64 start_time, end_time; + long long mem_begin, mem_during = 0; + + VERBOSE_SCALEOUT_STRING("kfree_scale_thread task started"); + set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + set_user_nice(current, MAX_NICE); + + start_time = ktime_get_mono_fast_ns(); + + if (atomic_inc_return(&n_kfree_scale_thread_started) >= kfree_nrealthreads) { + if (gp_exp) + b_rcu_gp_test_started = cur_ops->exp_completed() / 2; + else + b_rcu_gp_test_started = cur_ops->get_gp_seq(); + } + + do { + if (!mem_during) { + mem_during = mem_begin = si_mem_available(); + } else if (loop % (kfree_loops / 4) == 0) { + mem_during = (mem_during + si_mem_available()) / 2; + } + + for (i = 0; i < kfree_alloc_num; i++) { + alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL); + if (!alloc_ptr) + return -ENOMEM; + + kfree_rcu(alloc_ptr, rh); + } + + cond_resched(); + } while (!torture_must_stop() && ++loop < kfree_loops); + + if (atomic_inc_return(&n_kfree_scale_thread_ended) >= kfree_nrealthreads) { + end_time = ktime_get_mono_fast_ns(); + + if (gp_exp) + b_rcu_gp_test_finished = cur_ops->exp_completed() / 2; + else + b_rcu_gp_test_finished = cur_ops->get_gp_seq(); + + pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld, memory footprint: %lldMB\n", + (unsigned long long)(end_time - start_time), kfree_loops, + rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started), + (mem_begin - mem_during) >> (20 - PAGE_SHIFT)); + + if (shutdown) { + smp_mb(); /* Assign before wake. */ + wake_up(&shutdown_wq); + } + } + + torture_kthread_stopping("kfree_scale_thread"); + return 0; +} + +static void +kfree_scale_cleanup(void) +{ + int i; + + if (torture_cleanup_begin()) + return; + + if (kfree_reader_tasks) { + for (i = 0; i < kfree_nrealthreads; i++) + torture_stop_kthread(kfree_scale_thread, + kfree_reader_tasks[i]); + kfree(kfree_reader_tasks); + } + + torture_cleanup_end(); +} + +/* + * shutdown kthread. Just waits to be awakened, then shuts down system. + */ +static int +kfree_scale_shutdown(void *arg) +{ + wait_event(shutdown_wq, + atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads); + + smp_mb(); /* Wake before output. */ + + kfree_scale_cleanup(); + kernel_power_off(); + return -EINVAL; +} + +static int __init +kfree_scale_init(void) +{ + long i; + int firsterr = 0; + + kfree_nrealthreads = compute_real(kfree_nthreads); + /* Start up the kthreads. */ + if (shutdown) { + init_waitqueue_head(&shutdown_wq); + firsterr = torture_create_kthread(kfree_scale_shutdown, NULL, + shutdown_task); + if (firsterr) + goto unwind; + schedule_timeout_uninterruptible(1); + } + + pr_alert("kfree object size=%zu\n", kfree_mult * sizeof(struct kfree_obj)); + + kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]), + GFP_KERNEL); + if (kfree_reader_tasks == NULL) { + firsterr = -ENOMEM; + goto unwind; + } + + for (i = 0; i < kfree_nrealthreads; i++) { + firsterr = torture_create_kthread(kfree_scale_thread, (void *)i, + kfree_reader_tasks[i]); + if (firsterr) + goto unwind; + } + + while (atomic_read(&n_kfree_scale_thread_started) < kfree_nrealthreads) + schedule_timeout_uninterruptible(1); + + torture_init_end(); + return 0; + +unwind: + torture_init_end(); + kfree_scale_cleanup(); + return firsterr; +} + +static int __init +rcu_scale_init(void) +{ + long i; + int firsterr = 0; + static struct rcu_scale_ops *scale_ops[] = { + &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, + }; + + if (!torture_init_begin(scale_type, verbose)) + return -EBUSY; + + /* Process args and announce that the scalability'er is on the job. */ + for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { + cur_ops = scale_ops[i]; + if (strcmp(scale_type, cur_ops->name) == 0) + break; + } + if (i == ARRAY_SIZE(scale_ops)) { + pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); + pr_alert("rcu-scale types:"); + for (i = 0; i < ARRAY_SIZE(scale_ops); i++) + pr_cont(" %s", scale_ops[i]->name); + pr_cont("\n"); + WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST)); + firsterr = -EINVAL; + cur_ops = NULL; + goto unwind; + } + if (cur_ops->init) + cur_ops->init(); + + if (kfree_rcu_test) + return kfree_scale_init(); + + nrealwriters = compute_real(nwriters); + nrealreaders = compute_real(nreaders); + atomic_set(&n_rcu_scale_reader_started, 0); + atomic_set(&n_rcu_scale_writer_started, 0); + atomic_set(&n_rcu_scale_writer_finished, 0); + rcu_scale_print_module_parms(cur_ops, "Start of test"); + + /* Start up the kthreads. */ + + if (shutdown) { + init_waitqueue_head(&shutdown_wq); + firsterr = torture_create_kthread(rcu_scale_shutdown, NULL, + shutdown_task); + if (firsterr) + goto unwind; + schedule_timeout_uninterruptible(1); + } + reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]), + GFP_KERNEL); + if (reader_tasks == NULL) { + VERBOSE_SCALEOUT_ERRSTRING("out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + for (i = 0; i < nrealreaders; i++) { + firsterr = torture_create_kthread(rcu_scale_reader, (void *)i, + reader_tasks[i]); + if (firsterr) + goto unwind; + } + while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders) + schedule_timeout_uninterruptible(1); + writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]), + GFP_KERNEL); + writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), + GFP_KERNEL); + writer_n_durations = + kcalloc(nrealwriters, sizeof(*writer_n_durations), + GFP_KERNEL); + if (!writer_tasks || !writer_durations || !writer_n_durations) { + VERBOSE_SCALEOUT_ERRSTRING("out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + for (i = 0; i < nrealwriters; i++) { + writer_durations[i] = + kcalloc(MAX_MEAS, sizeof(*writer_durations[i]), + GFP_KERNEL); + if (!writer_durations[i]) { + firsterr = -ENOMEM; + goto unwind; + } + firsterr = torture_create_kthread(rcu_scale_writer, (void *)i, + writer_tasks[i]); + if (firsterr) + goto unwind; + } + torture_init_end(); + return 0; + +unwind: + torture_init_end(); + rcu_scale_cleanup(); + return firsterr; +} + +module_init(rcu_scale_init); +module_exit(rcu_scale_cleanup); diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh deleted file mode 100755 index 7d3c2be66c64..000000000000 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0+ -# -# Analyze a given results directory for rcuperf performance measurements, -# looking for ftrace data. Exits with 0 if data was found, analyzed, and -# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after -# argument checking. -# -# Usage: kvm-recheck-rcuperf-ftrace.sh resdir -# -# Copyright (C) IBM Corporation, 2016 -# -# Authors: Paul E. McKenney - -i="$1" -. functions.sh - -if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100 -then - exit 10 -fi - -sed -e 's/^\[[^]]*]//' < $i/console.log | -grep 'us : rcu_exp_grace_period' | -sed -e 's/us : / : /' | -tr -d '\015' | -awk ' -$8 == "start" { - if (startseq != "") - nlost++; - starttask = $1; - starttime = $3; - startseq = $7; - seqtask[startseq] = starttask; -} - -$8 == "end" { - if (startseq == $7) { - curgpdur = $3 - starttime; - gptimes[++n] = curgpdur; - gptaskcnt[starttask]++; - sum += curgpdur; - if (curgpdur > 1000) - print "Long GP " starttime "us to " $3 "us (" curgpdur "us)"; - startseq = ""; - } else { - # Lost a message or some such, reset. - startseq = ""; - nlost++; - } -} - -$8 == "done" && seqtask[$7] != $1 { - piggybackcnt[$1]++; -} - -END { - newNR = asort(gptimes); - if (newNR <= 0) { - print "No ftrace records found???" - exit 10; - } - pct50 = int(newNR * 50 / 100); - if (pct50 < 1) - pct50 = 1; - pct90 = int(newNR * 90 / 100); - if (pct90 < 1) - pct90 = 1; - pct99 = int(newNR * 99 / 100); - if (pct99 < 1) - pct99 = 1; - div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100; - print "Histogram bucket size: " div; - last = gptimes[1] - 10; - count = 0; - for (i = 1; i <= newNR; i++) { - current = div * int(gptimes[i] / div); - if (last == current) { - count++; - } else { - if (count > 0) - print last, count; - count = 1; - last = current; - } - } - if (count > 0) - print last, count; - print "Distribution of grace periods across tasks:"; - for (i in gptaskcnt) { - print "\t" i, gptaskcnt[i]; - nbatches += gptaskcnt[i]; - } - ngps = nbatches; - print "Distribution of piggybacking across tasks:"; - for (i in piggybackcnt) { - print "\t" i, piggybackcnt[i]; - ngps += piggybackcnt[i]; - } - print "Average grace-period duration: " sum / newNR " microseconds"; - print "Minimum grace-period duration: " gptimes[1]; - print "50th percentile grace-period duration: " gptimes[pct50]; - print "90th percentile grace-period duration: " gptimes[pct90]; - print "99th percentile grace-period duration: " gptimes[pct99]; - print "Maximum grace-period duration: " gptimes[newNR]; - print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0; - print "Computed from ftrace data."; -}' -exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh deleted file mode 100755 index db0375a57f28..000000000000 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0+ -# -# Analyze a given results directory for rcuperf performance measurements. -# -# Usage: kvm-recheck-rcuperf.sh resdir -# -# Copyright (C) IBM Corporation, 2016 -# -# Authors: Paul E. McKenney - -i="$1" -if test -d "$i" -a -r "$i" -then - : -else - echo Unreadable results directory: $i - exit 1 -fi -PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH -. functions.sh - -if kvm-recheck-rcuperf-ftrace.sh $i -then - # ftrace data was successfully analyzed, call it good! - exit 0 -fi - -configfile=`echo $i | sed -e 's/^.*\///'` - -sed -e 's/^\[[^]]*]//' < $i/console.log | -awk ' -/-perf: .* gps: .* batches:/ { - ngps = $9; - nbatches = $11; -} - -/-perf: .*writer-duration/ { - gptimes[++n] = $5 / 1000.; - sum += $5 / 1000.; -} - -END { - newNR = asort(gptimes); - if (newNR <= 0) { - print "No rcuperf records found???" - exit; - } - pct50 = int(newNR * 50 / 100); - if (pct50 < 1) - pct50 = 1; - pct90 = int(newNR * 90 / 100); - if (pct90 < 1) - pct90 = 1; - pct99 = int(newNR * 99 / 100); - if (pct99 < 1) - pct99 = 1; - div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100; - print "Histogram bucket size: " div; - last = gptimes[1] - 10; - count = 0; - for (i = 1; i <= newNR; i++) { - current = div * int(gptimes[i] / div); - if (last == current) { - count++; - } else { - if (count > 0) - print last, count; - count = 1; - last = current; - } - } - if (count > 0) - print last, count; - print "Average grace-period duration: " sum / newNR " microseconds"; - print "Minimum grace-period duration: " gptimes[1]; - print "50th percentile grace-period duration: " gptimes[pct50]; - print "90th percentile grace-period duration: " gptimes[pct90]; - print "99th percentile grace-period duration: " gptimes[pct99]; - print "Maximum grace-period duration: " gptimes[newNR]; - print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches; - print "Computed from rcuperf printk output."; -}' diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh new file mode 100755 index 000000000000..d4bec538086d --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Analyze a given results directory for rcuscale performance measurements, +# looking for ftrace data. Exits with 0 if data was found, analyzed, and +# printed. Intended to be invoked from kvm-recheck-rcuscale.sh after +# argument checking. +# +# Usage: kvm-recheck-rcuscale-ftrace.sh resdir +# +# Copyright (C) IBM Corporation, 2016 +# +# Authors: Paul E. McKenney + +i="$1" +. functions.sh + +if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100 +then + exit 10 +fi + +sed -e 's/^\[[^]]*]//' < $i/console.log | +grep 'us : rcu_exp_grace_period' | +sed -e 's/us : / : /' | +tr -d '\015' | +awk ' +$8 == "start" { + if (startseq != "") + nlost++; + starttask = $1; + starttime = $3; + startseq = $7; + seqtask[startseq] = starttask; +} + +$8 == "end" { + if (startseq == $7) { + curgpdur = $3 - starttime; + gptimes[++n] = curgpdur; + gptaskcnt[starttask]++; + sum += curgpdur; + if (curgpdur > 1000) + print "Long GP " starttime "us to " $3 "us (" curgpdur "us)"; + startseq = ""; + } else { + # Lost a message or some such, reset. + startseq = ""; + nlost++; + } +} + +$8 == "done" && seqtask[$7] != $1 { + piggybackcnt[$1]++; +} + +END { + newNR = asort(gptimes); + if (newNR <= 0) { + print "No ftrace records found???" + exit 10; + } + pct50 = int(newNR * 50 / 100); + if (pct50 < 1) + pct50 = 1; + pct90 = int(newNR * 90 / 100); + if (pct90 < 1) + pct90 = 1; + pct99 = int(newNR * 99 / 100); + if (pct99 < 1) + pct99 = 1; + div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100; + print "Histogram bucket size: " div; + last = gptimes[1] - 10; + count = 0; + for (i = 1; i <= newNR; i++) { + current = div * int(gptimes[i] / div); + if (last == current) { + count++; + } else { + if (count > 0) + print last, count; + count = 1; + last = current; + } + } + if (count > 0) + print last, count; + print "Distribution of grace periods across tasks:"; + for (i in gptaskcnt) { + print "\t" i, gptaskcnt[i]; + nbatches += gptaskcnt[i]; + } + ngps = nbatches; + print "Distribution of piggybacking across tasks:"; + for (i in piggybackcnt) { + print "\t" i, piggybackcnt[i]; + ngps += piggybackcnt[i]; + } + print "Average grace-period duration: " sum / newNR " microseconds"; + print "Minimum grace-period duration: " gptimes[1]; + print "50th percentile grace-period duration: " gptimes[pct50]; + print "90th percentile grace-period duration: " gptimes[pct90]; + print "99th percentile grace-period duration: " gptimes[pct99]; + print "Maximum grace-period duration: " gptimes[newNR]; + print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0; + print "Computed from ftrace data."; +}' +exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh new file mode 100755 index 000000000000..aa745152a525 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Analyze a given results directory for rcuscale scalability measurements. +# +# Usage: kvm-recheck-rcuscale.sh resdir +# +# Copyright (C) IBM Corporation, 2016 +# +# Authors: Paul E. McKenney + +i="$1" +if test -d "$i" -a -r "$i" +then + : +else + echo Unreadable results directory: $i + exit 1 +fi +PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH +. functions.sh + +if kvm-recheck-rcuscale-ftrace.sh $i +then + # ftrace data was successfully analyzed, call it good! + exit 0 +fi + +configfile=`echo $i | sed -e 's/^.*\///'` + +sed -e 's/^\[[^]]*]//' < $i/console.log | +awk ' +/-scale: .* gps: .* batches:/ { + ngps = $9; + nbatches = $11; +} + +/-scale: .*writer-duration/ { + gptimes[++n] = $5 / 1000.; + sum += $5 / 1000.; +} + +END { + newNR = asort(gptimes); + if (newNR <= 0) { + print "No rcuscale records found???" + exit; + } + pct50 = int(newNR * 50 / 100); + if (pct50 < 1) + pct50 = 1; + pct90 = int(newNR * 90 / 100); + if (pct90 < 1) + pct90 = 1; + pct99 = int(newNR * 99 / 100); + if (pct99 < 1) + pct99 = 1; + div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100; + print "Histogram bucket size: " div; + last = gptimes[1] - 10; + count = 0; + for (i = 1; i <= newNR; i++) { + current = div * int(gptimes[i] / div); + if (last == current) { + count++; + } else { + if (count > 0) + print last, count; + count = 1; + last = current; + } + } + if (count > 0) + print last, count; + print "Average grace-period duration: " sum / newNR " microseconds"; + print "Minimum grace-period duration: " gptimes[1]; + print "50th percentile grace-period duration: " gptimes[pct50]; + print "90th percentile grace-period duration: " gptimes[pct90]; + print "99th percentile grace-period duration: " gptimes[pct99]; + print "Maximum grace-period duration: " gptimes[newNR]; + print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches; + print "Computed from rcuscale printk output."; +}' diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 44dfdd9be67e..0489c198a72a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -65,7 +65,7 @@ usage () { echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." echo " --results absolute-pathname" - echo " --torture rcu" + echo " --torture lock|rcu|rcuscale|refscale|scf" echo " --trust-make" exit 1 } @@ -184,13 +184,13 @@ do shift ;; --torture) - checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\|scf\)$' '^--' + checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--' TORTURE_SUITE=$2 shift - if test "$TORTURE_SUITE" = rcuperf || test "$TORTURE_SUITE" = refscale + if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale then # If you really want jitter for refscale or - # rcuperf, specify it after specifying the rcuperf + # rcuscale, specify it after specifying the rcuscale # or the refscale. (But why jitter in these cases?) jitter=0 fi diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 4e081a25761e..e03338091a06 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -33,8 +33,8 @@ then fi cat /dev/null > $file.diags -# Check for proper termination, except for rcuperf and refscale. -if test "$TORTURE_SUITE" != rcuperf && test "$TORTURE_SUITE" != refscale +# Check for proper termination, except for rcuscale and refscale. +if test "$TORTURE_SUITE" != rcuscale && test "$TORTURE_SUITE" != refscale then # check for abject failure diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST deleted file mode 100644 index c9f56cf20775..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST +++ /dev/null @@ -1 +0,0 @@ -TREE diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon deleted file mode 100644 index a09816b8c0f3..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon +++ /dev/null @@ -1,2 +0,0 @@ -CONFIG_RCU_PERF_TEST=y -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY deleted file mode 100644 index fb05ef5279b4..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY +++ /dev/null @@ -1,16 +0,0 @@ -CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TINY_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_LOCKING=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_RCU_EXPERT=y -CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE deleted file mode 100644 index 721cfda76ab2..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE +++ /dev/null @@ -1,19 +0,0 @@ -CONFIG_SMP=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_LOCKING=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_RCU_EXPERT=y -CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 deleted file mode 100644 index 7629f5dd73b2..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=54 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_LOCKING=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_RCU_EXPERT=y -CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh deleted file mode 100644 index 777d5b0c190f..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0+ -# -# Torture-suite-dependent shell functions for the rest of the scripts. -# -# Copyright (C) IBM Corporation, 2015 -# -# Authors: Paul E. McKenney - -# per_version_boot_params bootparam-string config-file seconds -# -# Adds per-version torture-module parameters to kernels supporting them. -per_version_boot_params () { - echo $1 rcuperf.shutdown=1 \ - rcuperf.verbose=1 -} diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST new file mode 100644 index 000000000000..c9f56cf20775 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST @@ -0,0 +1 @@ +TREE diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon new file mode 100644 index 000000000000..87caa0e932c7 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon @@ -0,0 +1,2 @@ +CONFIG_RCU_SCALE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY new file mode 100644 index 000000000000..fb05ef5279b4 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY @@ -0,0 +1,16 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE new file mode 100644 index 000000000000..721cfda76ab2 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE @@ -0,0 +1,19 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 new file mode 100644 index 000000000000..7629f5dd73b2 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 @@ -0,0 +1,22 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=54 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh new file mode 100644 index 000000000000..0333e9b18522 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Torture-suite-dependent shell functions for the rest of the scripts. +# +# Copyright (C) IBM Corporation, 2015 +# +# Authors: Paul E. McKenney + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 rcuscale.shutdown=1 \ + rcuscale.verbose=1 +} -- cgit v1.3.1 From fbb9f8531a0d6693189783d295114db4c30624ca Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 2 Jul 2020 15:59:05 -0700 Subject: torture: document --allcpus argument added to the kvm.sh script Signed-off-by: Paul Gortmaker Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index e655983b7429..0a0846389a6e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -46,6 +46,7 @@ jitter="-1" usage () { echo "Usage: $scriptname optional arguments:" + echo " --allcpus" echo " --bootargs kernel-boot-arguments" echo " --bootimage relative-path-to-kernel-boot-image" echo " --buildonly" -- cgit v1.3.1 From afcdf2319d11e0d68e45babd5df65f79771074b5 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 21:37:06 +0200 Subject: rcutorture: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/doc/rcu-test-image.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt index 449cf579d6f9..cc280ba157a3 100644 --- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt +++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt @@ -36,7 +36,7 @@ References: https://help.ubuntu.com/community/JeOSVMBuilder http://wiki.libvirt.org/page/UbuntuKVMWalkthrough http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe" - http://www.landley.net/writing/rootfs-howto.html - http://en.wikipedia.org/wiki/Initrd - http://en.wikipedia.org/wiki/Cpio + https://www.landley.net/writing/rootfs-howto.html + https://en.wikipedia.org/wiki/Initrd + https://en.wikipedia.org/wiki/Cpio http://wiki.libvirt.org/page/UbuntuKVMWalkthrough -- cgit v1.3.1 From 33595581f53011d1f0ba64a9a2f76d6fa5528f7f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Jul 2020 14:18:33 -0700 Subject: torture: Update initrd documentation The rcu-test-image.txt documentation covers a very uncommon case where a real userspace environment is required. However, someone reading this document might reasonably conclude that this is in fact a prerequisite. In addition, the initrd.txt file mentions dracut, which is no longer used. This commit therefore provides the needed updates. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/doc/initrd.txt | 36 +++++----------------- .../selftests/rcutorture/doc/rcu-test-image.txt | 35 ++++++++++++++++++--- 2 files changed, 37 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt index 933b4fd12327..41a4255865d4 100644 --- a/tools/testing/selftests/rcutorture/doc/initrd.txt +++ b/tools/testing/selftests/rcutorture/doc/initrd.txt @@ -1,12 +1,11 @@ -The rcutorture scripting tools automatically create the needed initrd -directory using dracut. Failing that, this tool will create an initrd -containing a single statically linked binary named "init" that loops -over a very long sleep() call. In both cases, this creation is done -by tools/testing/selftests/rcutorture/bin/mkinitrd.sh. +The rcutorture scripting tools automatically create an initrd containing +a single statically linked binary named "init" that loops over a +very long sleep() call. In both cases, this creation is done by +tools/testing/selftests/rcutorture/bin/mkinitrd.sh. -However, if you are attempting to run rcutorture on a system that does -not have dracut installed, and if you don't like the notion of static -linking, you might wish to press an existing initrd into service: +However, if you don't like the notion of statically linked bare-bones +userspace environments, you might wish to press an existing initrd +into service: ------------------------------------------------------------------------ cd tools/testing/selftests/rcutorture @@ -15,24 +14,3 @@ mkdir initrd cd initrd cpio -id < /tmp/initrd.img.zcat # Manually verify that initrd contains needed binaries and libraries. ------------------------------------------------------------------------- - -Interestingly enough, if you are running rcutorture, you don't really -need userspace in many cases. Running without userspace has the -advantage of allowing you to test your kernel independently of the -distro in place, the root-filesystem layout, and so on. To make this -happen, put the following script in the initrd's tree's "/init" file, -with 0755 mode. - ------------------------------------------------------------------------- -#!/bin/sh - -while : -do - sleep 10 -done ------------------------------------------------------------------------- - -This approach also allows most of the binaries and libraries in the -initrd filesystem to be dispensed with, which can save significant -space in rcutorture's "res" directory. diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt index cc280ba157a3..b2fc247976b1 100644 --- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt +++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt @@ -1,8 +1,33 @@ -This document describes one way to create the rcu-test-image file -that contains the filesystem used by the guest-OS kernel. There are -probably much better ways of doing this, and this filesystem could no -doubt be smaller. It is probably also possible to simply download -an appropriate image from any number of places. +Normally, a minimal initrd is created automatically by the rcutorture +scripting. But minimal really does mean "minimal", namely just a single +root directory with a single statically linked executable named "init": + +$ size tools/testing/selftests/rcutorture/initrd/init + text data bss dec hex filename + 328 0 8 336 150 tools/testing/selftests/rcutorture/initrd/init + +Suppose you need to run some scripts, perhaps to monitor or control +some aspect of the rcutorture testing. This will require a more fully +filled-out userspace, perhaps containing libraries, executables for +the shell and other utilities, and soforth. In that case, place your +desired filesystem here: + + tools/testing/selftests/rcutorture/initrd + +For example, your tools/testing/selftests/rcutorture/initrd/init might +be a script that does any needed mount operations and starts whatever +scripts need starting to properly monitor or control your testing. +The next rcutorture build will then incorporate this filesystem into +the kernel image that is passed to qemu. + +Or maybe you need a real root filesystem for some reason, in which case +please read on! + +The remainder of this document describes one way to create the +rcu-test-image file that contains the filesystem used by the guest-OS +kernel. There are probably much better ways of doing this, and this +filesystem could no doubt be smaller. It is probably also possible to +simply download an appropriate image from any number of places. That said, here are the commands: -- cgit v1.3.1 From fc848cf4face352dce663c1fcc73717fba2d4557 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Jul 2020 11:02:15 -0700 Subject: rcutorture: Add CONFIG_PROVE_RCU_LIST to TREE05 Currently, the CONFIG_PROVE_RCU_LIST=y case is untested. This commit therefore adds CONFIG_PROVE_RCU_LIST=y to rcutorture's TREE05 scenario. Cc: Madhuparna Bhowmik Cc: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/TREE05 | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 2dde0d9964e3..4f95f8544f3f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -16,5 +16,6 @@ CONFIG_RCU_NOCB_CPU=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y +CONFIG_PROVE_RCU_LIST=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -- cgit v1.3.1 From 5461808889405de254ab3370aa7f07ac0b6cb938 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 19 Jul 2020 12:17:53 -0700 Subject: torture: Add kvm.sh --help and update help message This commit adds a --help argument (along with its synonym -h) to display the help text. While in the area, this commit also updates the help text. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 0a0846389a6e..fc15b527172f 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -56,17 +56,18 @@ usage () { echo " --defconfig string" echo " --dryrun sched|script" echo " --duration minutes" + echo " --help" echo " --interactive" echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" echo " --kconfig Kconfig-options" echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" - echo " --memory megabytes | nnnG" + echo " --memory megabytes|nnnG" echo " --no-initrd" echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." echo " --results absolute-pathname" - echo " --torture rcu" + echo " --torture lock|rcu|rcuperf|refscale|scf" echo " --trust-make" exit 1 } @@ -127,6 +128,9 @@ do dur=$(($2*60)) shift ;; + --help|-h) + usage + ;; --interactive) TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE ;; -- cgit v1.3.1 From b67a91703a29b93f5b114052b0b8e0d84e717ad3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Aug 2020 16:44:48 -0700 Subject: torture: Add gdb support This commit adds a "--gdb" parameter to kvm.sh, which causes "CONFIG_DEBUG_INFO=y" to be added to the Kconfig options, "nokaslr" to be added to the boot parameters, and "-s -S" to be added to the qemu arguments. Furthermore, the scripting prints messages telling the user how to start up gdb for the run in question. Because of the interactive nature of gdb sessions, only one "--configs" scenario is permitted when "--gdb" is specified. For most torture types, this means that a "--configs" argument is required, and that argument must specify the single scenario of interest. The usual cautions about breakpoints and timing apply, for example, staring at your gdb prompt for too long will likely get you many complaints, including RCU CPU stall warnings. Omar Sandoval further suggests using gdb's "hbreak" command instead of the "break" command on systems supporting hardware breakpoints, and further using the "commands" option because the resulting non-interactive breakpoints are less likely to get you RCU CPU stall warnings. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-test-1-run.sh | 33 ++++++++++++++++------ tools/testing/selftests/rcutorture/bin/kvm.sh | 21 ++++++++++++++ 2 files changed, 46 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index e07779a62634..6dc2b49b85ea 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -66,6 +66,7 @@ config_override_param () { echo > $T/KcList config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`" config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`" +config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG" config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG" config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG" config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG" @@ -152,7 +153,11 @@ qemu_append="`identify_qemu_append "$QEMU"`" boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" # Generate kernel-version-specific boot parameters boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" -echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd +if test -n "$TORTURE_BOOT_GDB_ARG" +then + boot_args="$boot_args $TORTURE_BOOT_GDB_ARG" +fi +echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd if test -n "$TORTURE_BUILDONLY" then @@ -171,14 +176,26 @@ echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log # Attempt to run qemu ( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & commandcompleted=0 -sleep 10 # Give qemu's pid a chance to reach the file -if test -s "$resdir/qemu_pid" +if test -z "$TORTURE_KCONFIG_GDB_ARG" then - qemu_pid=`cat "$resdir/qemu_pid"` - echo Monitoring qemu job at pid $qemu_pid -else - qemu_pid="" - echo Monitoring qemu job at yet-as-unknown pid + sleep 10 # Give qemu's pid a chance to reach the file + if test -s "$resdir/qemu_pid" + then + qemu_pid=`cat "$resdir/qemu_pid"` + echo Monitoring qemu job at pid $qemu_pid + else + qemu_pid="" + echo Monitoring qemu job at yet-as-unknown pid + fi +fi +if test -n "$TORTURE_KCONFIG_GDB_ARG" +then + echo Waiting for you to attach a debug session, for example: > /dev/tty + echo " gdb $base_resdir/vmlinux" > /dev/tty + echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty + echo " target remote :1234" > /dev/tty + echo " continue" > /dev/tty + kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` fi while : do diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index fc15b527172f..c30047e52b54 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -31,6 +31,9 @@ TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" +TORTURE_KCONFIG_GDB_ARG="" +TORTURE_BOOT_GDB_ARG="" +TORTURE_QEMU_GDB_ARG="" TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" @@ -56,6 +59,7 @@ usage () { echo " --defconfig string" echo " --dryrun sched|script" echo " --duration minutes" + echo " --gdb" echo " --help" echo " --interactive" echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" @@ -128,6 +132,11 @@ do dur=$(($2*60)) shift ;; + --gdb) + TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG + TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG + TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG + ;; --help|-h) usage ;; @@ -253,6 +262,15 @@ do done touch $T/cfgcpu configs_derep="`echo $configs_derep | sed -e "s/\/$defaultconfigs/g"`" +if test -n "$TORTURE_KCONFIG_GDB_ARG" +then + if test "`echo $configs_derep | wc -w`" -gt 1 + then + echo "The --config list is: $configs_derep." + echo "Only one --config permitted with --gdb, terminating." + exit 1 + fi +fi for CF1 in $configs_derep do if test -f "$CONFIGFRAG/$CF1" @@ -328,6 +346,9 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG +TORTURE_KCONFIG_GDB_ARG="$TORTURE_KCONFIG_GDB_ARG"; export TORTURE_KCONFIG_GDB_ARG +TORTURE_BOOT_GDB_ARG="$TORTURE_BOOT_GDB_ARG"; export TORTURE_BOOT_GDB_ARG +TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG -- cgit v1.3.1 From 0fcdfffe80346d015b920228203d0269284d8b13 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 24 Aug 2020 15:28:07 -0700 Subject: selftests/bpf: Enable tc verbose mode for test_sk_assign Currently test_sk_assign failed verifier with llvm11/llvm12. During debugging, I found the default verifier output is truncated like below Verifier analysis: Skipped 2200 bytes, use 'verb' option for the full verbose log. [...] off=23,r=34,imm=0) R5=inv0 R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=34,imm=0) R10=fp0 80: (0f) r7 += r2 last_idx 80 first_idx 21 regs=4 stack=0 before 78: (16) if w3 == 0x11 goto pc+1 when I am using "./test_progs -vv -t assign". The reason is tc verbose mode is not enabled. This patched enabled tc verbose mode and the output looks like below Verifier analysis: 0: (bf) r6 = r1 1: (b4) w0 = 2 2: (61) r1 = *(u32 *)(r6 +80) 3: (61) r7 = *(u32 *)(r6 +76) 4: (bf) r2 = r7 5: (07) r2 += 14 6: (2d) if r2 > r1 goto pc+61 R0_w=inv2 R1_w=pkt_end(id=0,off=0,imm=0) R2_w=pkt(id=0,off=14,r=14,imm=0) ... Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200824222807.100200-1-yhs@fb.com --- tools/testing/selftests/bpf/prog_tests/sk_assign.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index d43038d2b9e1..a49a26f95a8b 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -49,7 +49,7 @@ configure_stack(void) sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf", "direct-action object-file ./test_sk_assign.o", "section classifier/sk_assign_test", - (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : ""); + (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose"); if (CHECK(system(tc_cmd), "BPF load failed;", "run with -vv for more info\n")) return false; -- cgit v1.3.1 From 1f00d375af84fbcdb6dd6c79fd7c3d02d2390338 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:13 +0200 Subject: bpf: Renames in preparation for bpf_local_storage A purely mechanical change to split the renaming from the actual generalization. Flags/consts: SK_STORAGE_CREATE_FLAG_MASK BPF_LOCAL_STORAGE_CREATE_FLAG_MASK BPF_SK_STORAGE_CACHE_SIZE BPF_LOCAL_STORAGE_CACHE_SIZE MAX_VALUE_SIZE BPF_LOCAL_STORAGE_MAX_VALUE_SIZE Structs: bucket bpf_local_storage_map_bucket bpf_sk_storage_map bpf_local_storage_map bpf_sk_storage_data bpf_local_storage_data bpf_sk_storage_elem bpf_local_storage_elem bpf_sk_storage bpf_local_storage The "sk" member in bpf_local_storage is also updated to "owner" in preparation for changing the type to void * in a subsequent patch. Functions: selem_linked_to_sk selem_linked_to_storage selem_alloc bpf_selem_alloc __selem_unlink_sk bpf_selem_unlink_storage_nolock __selem_link_sk bpf_selem_link_storage_nolock selem_unlink_sk __bpf_selem_unlink_storage sk_storage_update bpf_local_storage_update __sk_storage_lookup bpf_local_storage_lookup bpf_sk_storage_map_free bpf_local_storage_map_free bpf_sk_storage_map_alloc bpf_local_storage_map_alloc bpf_sk_storage_map_alloc_check bpf_local_storage_map_alloc_check bpf_sk_storage_map_check_btf bpf_local_storage_map_check_btf Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200825182919.1118197-2-kpsingh@chromium.org --- include/net/sock.h | 4 +- net/core/bpf_sk_storage.c | 488 ++++++++++++----------- tools/testing/selftests/bpf/progs/map_ptr_kern.c | 6 +- 3 files changed, 255 insertions(+), 243 deletions(-) (limited to 'tools') diff --git a/include/net/sock.h b/include/net/sock.h index 064637d1ddf6..18423cc9cde8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -246,7 +246,7 @@ struct sock_common { /* public: */ }; -struct bpf_sk_storage; +struct bpf_local_storage; /** * struct sock - network layer representation of sockets @@ -517,7 +517,7 @@ struct sock { void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; #ifdef CONFIG_BPF_SYSCALL - struct bpf_sk_storage __rcu *sk_bpf_storage; + struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct rcu_head sk_rcu; }; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 281200dc0a01..f975e2d01207 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -12,33 +12,32 @@ #include #include -#define SK_STORAGE_CREATE_FLAG_MASK \ - (BPF_F_NO_PREALLOC | BPF_F_CLONE) +#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) -struct bucket { +struct bpf_local_storage_map_bucket { struct hlist_head list; raw_spinlock_t lock; }; -/* Thp map is not the primary owner of a bpf_sk_storage_elem. - * Instead, the sk->sk_bpf_storage is. +/* Thp map is not the primary owner of a bpf_local_storage_elem. + * Instead, the container object (eg. sk->sk_bpf_storage) is. * - * The map (bpf_sk_storage_map) is for two purposes - * 1. Define the size of the "sk local storage". It is + * The map (bpf_local_storage_map) is for two purposes + * 1. Define the size of the "local storage". It is * the map's value_size. * * 2. Maintain a list to keep track of all elems such * that they can be cleaned up during the map destruction. * * When a bpf local storage is being looked up for a - * particular sk, the "bpf_map" pointer is actually used + * particular object, the "bpf_map" pointer is actually used * as the "key" to search in the list of elem in - * sk->sk_bpf_storage. + * the respective bpf_local_storage owned by the object. * - * Hence, consider sk->sk_bpf_storage is the mini-map - * with the "bpf_map" pointer as the searching key. + * e.g. sk->sk_bpf_storage is the mini-map with the "bpf_map" pointer + * as the searching key. */ -struct bpf_sk_storage_map { +struct bpf_local_storage_map { struct bpf_map map; /* Lookup elem does not require accessing the map. * @@ -46,55 +45,57 @@ struct bpf_sk_storage_map { * link/unlink the elem from the map. Having * multiple buckets to improve contention. */ - struct bucket *buckets; + struct bpf_local_storage_map_bucket *buckets; u32 bucket_log; u16 elem_size; u16 cache_idx; }; -struct bpf_sk_storage_data { +struct bpf_local_storage_data { /* smap is used as the searching key when looking up - * from sk->sk_bpf_storage. + * from the object's bpf_local_storage. * * Put it in the same cacheline as the data to minimize * the number of cachelines access during the cache hit case. */ - struct bpf_sk_storage_map __rcu *smap; + struct bpf_local_storage_map __rcu *smap; u8 data[] __aligned(8); }; -/* Linked to bpf_sk_storage and bpf_sk_storage_map */ -struct bpf_sk_storage_elem { - struct hlist_node map_node; /* Linked to bpf_sk_storage_map */ - struct hlist_node snode; /* Linked to bpf_sk_storage */ - struct bpf_sk_storage __rcu *sk_storage; +/* Linked to bpf_local_storage and bpf_local_storage_map */ +struct bpf_local_storage_elem { + struct hlist_node map_node; /* Linked to bpf_local_storage_map */ + struct hlist_node snode; /* Linked to bpf_local_storage */ + struct bpf_local_storage __rcu *local_storage; struct rcu_head rcu; /* 8 bytes hole */ /* The data is stored in aother cacheline to minimize * the number of cachelines access during a cache hit. */ - struct bpf_sk_storage_data sdata ____cacheline_aligned; + struct bpf_local_storage_data sdata ____cacheline_aligned; }; -#define SELEM(_SDATA) container_of((_SDATA), struct bpf_sk_storage_elem, sdata) +#define SELEM(_SDATA) \ + container_of((_SDATA), struct bpf_local_storage_elem, sdata) #define SDATA(_SELEM) (&(_SELEM)->sdata) -#define BPF_SK_STORAGE_CACHE_SIZE 16 +#define BPF_LOCAL_STORAGE_CACHE_SIZE 16 static DEFINE_SPINLOCK(cache_idx_lock); -static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE]; +static u64 cache_idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE]; -struct bpf_sk_storage { - struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE]; - struct hlist_head list; /* List of bpf_sk_storage_elem */ - struct sock *sk; /* The sk that owns the the above "list" of - * bpf_sk_storage_elem. +struct bpf_local_storage { + struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; + struct hlist_head list; /* List of bpf_local_storage_elem */ + struct sock *owner; /* The object that owns the above "list" of + * bpf_local_storage_elem. */ struct rcu_head rcu; raw_spinlock_t lock; /* Protect adding/removing from the "list" */ }; -static struct bucket *select_bucket(struct bpf_sk_storage_map *smap, - struct bpf_sk_storage_elem *selem) +static struct bpf_local_storage_map_bucket * +select_bucket(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem) { return &smap->buckets[hash_ptr(selem, smap->bucket_log)]; } @@ -111,21 +112,21 @@ static int omem_charge(struct sock *sk, unsigned int size) return -ENOMEM; } -static bool selem_linked_to_sk(const struct bpf_sk_storage_elem *selem) +static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->snode); } -static bool selem_linked_to_map(const struct bpf_sk_storage_elem *selem) +static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->map_node); } -static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap, - struct sock *sk, void *value, - bool charge_omem) +static struct bpf_local_storage_elem * +bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk, + void *value, bool charge_omem) { - struct bpf_sk_storage_elem *selem; + struct bpf_local_storage_elem *selem; if (charge_omem && omem_charge(sk, smap->elem_size)) return NULL; @@ -143,89 +144,93 @@ static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap, return NULL; } -/* sk_storage->lock must be held and selem->sk_storage == sk_storage. +/* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. */ -static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage, - struct bpf_sk_storage_elem *selem, - bool uncharge_omem) +static bool +bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem, + bool uncharge_omem) { - struct bpf_sk_storage_map *smap; - bool free_sk_storage; + struct bpf_local_storage_map *smap; + bool free_local_storage; struct sock *sk; smap = rcu_dereference(SDATA(selem)->smap); - sk = sk_storage->sk; + sk = local_storage->owner; - /* All uncharging on sk->sk_omem_alloc must be done first. - * sk may be freed once the last selem is unlinked from sk_storage. + /* All uncharging on the owner must be done first. + * The owner may be freed once the last selem is unlinked + * from local_storage. */ if (uncharge_omem) atomic_sub(smap->elem_size, &sk->sk_omem_alloc); - free_sk_storage = hlist_is_singular_node(&selem->snode, - &sk_storage->list); - if (free_sk_storage) { - atomic_sub(sizeof(struct bpf_sk_storage), &sk->sk_omem_alloc); - sk_storage->sk = NULL; + free_local_storage = hlist_is_singular_node(&selem->snode, + &local_storage->list); + if (free_local_storage) { + atomic_sub(sizeof(struct bpf_local_storage), &sk->sk_omem_alloc); + local_storage->owner = NULL; /* After this RCU_INIT, sk may be freed and cannot be used */ RCU_INIT_POINTER(sk->sk_bpf_storage, NULL); - /* sk_storage is not freed now. sk_storage->lock is - * still held and raw_spin_unlock_bh(&sk_storage->lock) + /* local_storage is not freed now. local_storage->lock is + * still held and raw_spin_unlock_bh(&local_storage->lock) * will be done by the caller. * * Although the unlock will be done under * rcu_read_lock(), it is more intutivie to - * read if kfree_rcu(sk_storage, rcu) is done - * after the raw_spin_unlock_bh(&sk_storage->lock). + * read if kfree_rcu(local_storage, rcu) is done + * after the raw_spin_unlock_bh(&local_storage->lock). * - * Hence, a "bool free_sk_storage" is returned + * Hence, a "bool free_local_storage" is returned * to the caller which then calls the kfree_rcu() * after unlock. */ } hlist_del_init_rcu(&selem->snode); - if (rcu_access_pointer(sk_storage->cache[smap->cache_idx]) == + if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) == SDATA(selem)) - RCU_INIT_POINTER(sk_storage->cache[smap->cache_idx], NULL); + RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); kfree_rcu(selem, rcu); - return free_sk_storage; + return free_local_storage; } -static void selem_unlink_sk(struct bpf_sk_storage_elem *selem) +static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) { - struct bpf_sk_storage *sk_storage; - bool free_sk_storage = false; + struct bpf_local_storage *local_storage; + bool free_local_storage = false; - if (unlikely(!selem_linked_to_sk(selem))) + if (unlikely(!selem_linked_to_storage(selem))) /* selem has already been unlinked from sk */ return; - sk_storage = rcu_dereference(selem->sk_storage); - raw_spin_lock_bh(&sk_storage->lock); - if (likely(selem_linked_to_sk(selem))) - free_sk_storage = __selem_unlink_sk(sk_storage, selem, true); - raw_spin_unlock_bh(&sk_storage->lock); + local_storage = rcu_dereference(selem->local_storage); + raw_spin_lock_bh(&local_storage->lock); + if (likely(selem_linked_to_storage(selem))) + free_local_storage = + bpf_selem_unlink_storage_nolock(local_storage, selem, true); + raw_spin_unlock_bh(&local_storage->lock); - if (free_sk_storage) - kfree_rcu(sk_storage, rcu); + if (free_local_storage) + kfree_rcu(local_storage, rcu); } -static void __selem_link_sk(struct bpf_sk_storage *sk_storage, - struct bpf_sk_storage_elem *selem) +static void +bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem) { - RCU_INIT_POINTER(selem->sk_storage, sk_storage); - hlist_add_head(&selem->snode, &sk_storage->list); + RCU_INIT_POINTER(selem->local_storage, local_storage); + hlist_add_head(&selem->snode, &local_storage->list); } -static void selem_unlink_map(struct bpf_sk_storage_elem *selem) +static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) { - struct bpf_sk_storage_map *smap; - struct bucket *b; + struct bpf_local_storage_map *smap; + struct bpf_local_storage_map_bucket *b; if (unlikely(!selem_linked_to_map(selem))) /* selem has already be unlinked from smap */ @@ -239,10 +244,10 @@ static void selem_unlink_map(struct bpf_sk_storage_elem *selem) raw_spin_unlock_bh(&b->lock); } -static void selem_link_map(struct bpf_sk_storage_map *smap, - struct bpf_sk_storage_elem *selem) +static void bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem) { - struct bucket *b = select_bucket(smap, selem); + struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem); raw_spin_lock_bh(&b->lock); RCU_INIT_POINTER(SDATA(selem)->smap, smap); @@ -250,31 +255,31 @@ static void selem_link_map(struct bpf_sk_storage_map *smap, raw_spin_unlock_bh(&b->lock); } -static void selem_unlink(struct bpf_sk_storage_elem *selem) +static void bpf_selem_unlink(struct bpf_local_storage_elem *selem) { - /* Always unlink from map before unlinking from sk_storage + /* Always unlink from map before unlinking from local_storage * because selem will be freed after successfully unlinked from - * the sk_storage. + * the local_storage. */ - selem_unlink_map(selem); - selem_unlink_sk(selem); + bpf_selem_unlink_map(selem); + __bpf_selem_unlink_storage(selem); } -static struct bpf_sk_storage_data * -__sk_storage_lookup(struct bpf_sk_storage *sk_storage, - struct bpf_sk_storage_map *smap, - bool cacheit_lockit) +static struct bpf_local_storage_data * +bpf_local_storage_lookup(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + bool cacheit_lockit) { - struct bpf_sk_storage_data *sdata; - struct bpf_sk_storage_elem *selem; + struct bpf_local_storage_data *sdata; + struct bpf_local_storage_elem *selem; /* Fast path (cache hit) */ - sdata = rcu_dereference(sk_storage->cache[smap->cache_idx]); + sdata = rcu_dereference(local_storage->cache[smap->cache_idx]); if (sdata && rcu_access_pointer(sdata->smap) == smap) return sdata; /* Slow path (cache miss) */ - hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) + hlist_for_each_entry_rcu(selem, &local_storage->list, snode) if (rcu_access_pointer(SDATA(selem)->smap) == smap) break; @@ -286,33 +291,33 @@ __sk_storage_lookup(struct bpf_sk_storage *sk_storage, /* spinlock is needed to avoid racing with the * parallel delete. Otherwise, publishing an already * deleted sdata to the cache will become a use-after-free - * problem in the next __sk_storage_lookup(). + * problem in the next bpf_local_storage_lookup(). */ - raw_spin_lock_bh(&sk_storage->lock); - if (selem_linked_to_sk(selem)) - rcu_assign_pointer(sk_storage->cache[smap->cache_idx], + raw_spin_lock_bh(&local_storage->lock); + if (selem_linked_to_storage(selem)) + rcu_assign_pointer(local_storage->cache[smap->cache_idx], sdata); - raw_spin_unlock_bh(&sk_storage->lock); + raw_spin_unlock_bh(&local_storage->lock); } return sdata; } -static struct bpf_sk_storage_data * +static struct bpf_local_storage_data * sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) { - struct bpf_sk_storage *sk_storage; - struct bpf_sk_storage_map *smap; + struct bpf_local_storage *sk_storage; + struct bpf_local_storage_map *smap; sk_storage = rcu_dereference(sk->sk_bpf_storage); if (!sk_storage) return NULL; - smap = (struct bpf_sk_storage_map *)map; - return __sk_storage_lookup(sk_storage, smap, cacheit_lockit); + smap = (struct bpf_local_storage_map *)map; + return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); } -static int check_flags(const struct bpf_sk_storage_data *old_sdata, +static int check_flags(const struct bpf_local_storage_data *old_sdata, u64 map_flags) { if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) @@ -327,10 +332,10 @@ static int check_flags(const struct bpf_sk_storage_data *old_sdata, } static int sk_storage_alloc(struct sock *sk, - struct bpf_sk_storage_map *smap, - struct bpf_sk_storage_elem *first_selem) + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *first_selem) { - struct bpf_sk_storage *prev_sk_storage, *sk_storage; + struct bpf_local_storage *prev_sk_storage, *sk_storage; int err; err = omem_charge(sk, sizeof(*sk_storage)); @@ -344,10 +349,10 @@ static int sk_storage_alloc(struct sock *sk, } INIT_HLIST_HEAD(&sk_storage->list); raw_spin_lock_init(&sk_storage->lock); - sk_storage->sk = sk; + sk_storage->owner = sk; - __selem_link_sk(sk_storage, first_selem); - selem_link_map(smap, first_selem); + bpf_selem_link_storage_nolock(sk_storage, first_selem); + bpf_selem_link_map(smap, first_selem); /* Publish sk_storage to sk. sk->sk_lock cannot be acquired. * Hence, atomic ops is used to set sk->sk_bpf_storage * from NULL to the newly allocated sk_storage ptr. @@ -357,17 +362,17 @@ static int sk_storage_alloc(struct sock *sk, * the sk->sk_bpf_storage, the sk_storage->lock must * be held before setting sk->sk_bpf_storage to NULL. */ - prev_sk_storage = cmpxchg((struct bpf_sk_storage **)&sk->sk_bpf_storage, + prev_sk_storage = cmpxchg((struct bpf_local_storage **)&sk->sk_bpf_storage, NULL, sk_storage); if (unlikely(prev_sk_storage)) { - selem_unlink_map(first_selem); + bpf_selem_unlink_map(first_selem); err = -EAGAIN; goto uncharge; /* Note that even first_selem was linked to smap's * bucket->list, first_selem can be freed immediately * (instead of kfree_rcu) because - * bpf_sk_storage_map_free() does a + * bpf_local_storage_map_free() does a * synchronize_rcu() before walking the bucket->list. * Hence, no one is accessing selem from the * bucket->list under rcu_read_lock(). @@ -387,15 +392,14 @@ uncharge: * Otherwise, it will become a leak (and other memory issues * during map destruction). */ -static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk, - struct bpf_map *map, - void *value, - u64 map_flags) +static struct bpf_local_storage_data * +bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value, + u64 map_flags) { - struct bpf_sk_storage_data *old_sdata = NULL; - struct bpf_sk_storage_elem *selem; - struct bpf_sk_storage *sk_storage; - struct bpf_sk_storage_map *smap; + struct bpf_local_storage_data *old_sdata = NULL; + struct bpf_local_storage_elem *selem; + struct bpf_local_storage *local_storage; + struct bpf_local_storage_map *smap; int err; /* BPF_EXIST and BPF_NOEXIST cannot be both set */ @@ -404,15 +408,15 @@ static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk, unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map))) return ERR_PTR(-EINVAL); - smap = (struct bpf_sk_storage_map *)map; - sk_storage = rcu_dereference(sk->sk_bpf_storage); - if (!sk_storage || hlist_empty(&sk_storage->list)) { - /* Very first elem for this sk */ + smap = (struct bpf_local_storage_map *)map; + local_storage = rcu_dereference(sk->sk_bpf_storage); + if (!local_storage || hlist_empty(&local_storage->list)) { + /* Very first elem for the owner */ err = check_flags(NULL, map_flags); if (err) return ERR_PTR(err); - selem = selem_alloc(smap, sk, value, true); + selem = bpf_selem_alloc(smap, sk, value, true); if (!selem) return ERR_PTR(-ENOMEM); @@ -428,25 +432,26 @@ static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk, if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) { /* Hoping to find an old_sdata to do inline update - * such that it can avoid taking the sk_storage->lock + * such that it can avoid taking the local_storage->lock * and changing the lists. */ - old_sdata = __sk_storage_lookup(sk_storage, smap, false); + old_sdata = + bpf_local_storage_lookup(local_storage, smap, false); err = check_flags(old_sdata, map_flags); if (err) return ERR_PTR(err); - if (old_sdata && selem_linked_to_sk(SELEM(old_sdata))) { + if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) { copy_map_value_locked(map, old_sdata->data, value, false); return old_sdata; } } - raw_spin_lock_bh(&sk_storage->lock); + raw_spin_lock_bh(&local_storage->lock); - /* Recheck sk_storage->list under sk_storage->lock */ - if (unlikely(hlist_empty(&sk_storage->list))) { - /* A parallel del is happening and sk_storage is going + /* Recheck local_storage->list under local_storage->lock */ + if (unlikely(hlist_empty(&local_storage->list))) { + /* A parallel del is happening and local_storage is going * away. It has just been checked before, so very * unlikely. Return instead of retry to keep things * simple. @@ -455,7 +460,7 @@ static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk, goto unlock_err; } - old_sdata = __sk_storage_lookup(sk_storage, smap, false); + old_sdata = bpf_local_storage_lookup(local_storage, smap, false); err = check_flags(old_sdata, map_flags); if (err) goto unlock_err; @@ -466,50 +471,52 @@ static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk, goto unlock; } - /* sk_storage->lock is held. Hence, we are sure + /* local_storage->lock is held. Hence, we are sure * we can unlink and uncharge the old_sdata successfully * later. Hence, instead of charging the new selem now * and then uncharge the old selem later (which may cause * a potential but unnecessary charge failure), avoid taking * a charge at all here (the "!old_sdata" check) and the - * old_sdata will not be uncharged later during __selem_unlink_sk(). + * old_sdata will not be uncharged later during + * bpf_selem_unlink_storage_nolock(). */ - selem = selem_alloc(smap, sk, value, !old_sdata); + selem = bpf_selem_alloc(smap, sk, value, !old_sdata); if (!selem) { err = -ENOMEM; goto unlock_err; } /* First, link the new selem to the map */ - selem_link_map(smap, selem); + bpf_selem_link_map(smap, selem); - /* Second, link (and publish) the new selem to sk_storage */ - __selem_link_sk(sk_storage, selem); + /* Second, link (and publish) the new selem to local_storage */ + bpf_selem_link_storage_nolock(local_storage, selem); /* Third, remove old selem, SELEM(old_sdata) */ if (old_sdata) { - selem_unlink_map(SELEM(old_sdata)); - __selem_unlink_sk(sk_storage, SELEM(old_sdata), false); + bpf_selem_unlink_map(SELEM(old_sdata)); + bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), + false); } unlock: - raw_spin_unlock_bh(&sk_storage->lock); + raw_spin_unlock_bh(&local_storage->lock); return SDATA(selem); unlock_err: - raw_spin_unlock_bh(&sk_storage->lock); + raw_spin_unlock_bh(&local_storage->lock); return ERR_PTR(err); } static int sk_storage_delete(struct sock *sk, struct bpf_map *map) { - struct bpf_sk_storage_data *sdata; + struct bpf_local_storage_data *sdata; sdata = sk_storage_lookup(sk, map, false); if (!sdata) return -ENOENT; - selem_unlink(SELEM(sdata)); + bpf_selem_unlink(SELEM(sdata)); return 0; } @@ -521,7 +528,7 @@ static u16 cache_idx_get(void) spin_lock(&cache_idx_lock); - for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) { + for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) { if (cache_idx_usage_counts[i] < min_usage) { min_usage = cache_idx_usage_counts[i]; res = i; @@ -548,8 +555,8 @@ static void cache_idx_free(u16 idx) /* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { - struct bpf_sk_storage_elem *selem; - struct bpf_sk_storage *sk_storage; + struct bpf_local_storage_elem *selem; + struct bpf_local_storage *sk_storage; bool free_sk_storage = false; struct hlist_node *n; @@ -565,7 +572,7 @@ void bpf_sk_storage_free(struct sock *sk) * Thus, no elem can be added-to or deleted-from the * sk_storage->list by the bpf_prog or by the bpf-map's syscall. * - * It is racing with bpf_sk_storage_map_free() alone + * It is racing with bpf_local_storage_map_free() alone * when unlinking elem from the sk_storage->list and * the map's bucket->list. */ @@ -574,8 +581,9 @@ void bpf_sk_storage_free(struct sock *sk) /* Always unlink from map before unlinking from * sk_storage. */ - selem_unlink_map(selem); - free_sk_storage = __selem_unlink_sk(sk_storage, selem, true); + bpf_selem_unlink_map(selem); + free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage, + selem, true); } raw_spin_unlock_bh(&sk_storage->lock); rcu_read_unlock(); @@ -584,14 +592,14 @@ void bpf_sk_storage_free(struct sock *sk) kfree_rcu(sk_storage, rcu); } -static void bpf_sk_storage_map_free(struct bpf_map *map) +static void bpf_local_storage_map_free(struct bpf_map *map) { - struct bpf_sk_storage_elem *selem; - struct bpf_sk_storage_map *smap; - struct bucket *b; + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; + struct bpf_local_storage_map_bucket *b; unsigned int i; - smap = (struct bpf_sk_storage_map *)map; + smap = (struct bpf_local_storage_map *)map; cache_idx_free(smap->cache_idx); @@ -604,10 +612,10 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) /* bpf prog and the userspace can no longer access this map * now. No new selem (of this map) can be added - * to the sk->sk_bpf_storage or to the map bucket's list. + * to the owner->storage or to the map bucket's list. * * The elem of this map can be cleaned up here - * or + * or when the storage is freed e.g. * by bpf_sk_storage_free() during __sk_destruct(). */ for (i = 0; i < (1U << smap->bucket_log); i++) { @@ -615,26 +623,26 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) rcu_read_lock(); /* No one is adding to b->list now */ - while ((selem = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&b->list)), - struct bpf_sk_storage_elem, - map_node))) { - selem_unlink(selem); + while ((selem = hlist_entry_safe( + rcu_dereference_raw(hlist_first_rcu(&b->list)), + struct bpf_local_storage_elem, map_node))) { + bpf_selem_unlink(selem); cond_resched_rcu(); } rcu_read_unlock(); } - /* bpf_sk_storage_free() may still need to access the map. - * e.g. bpf_sk_storage_free() has unlinked selem from the map + /* While freeing the storage we may still need to access the map. + * + * e.g. when bpf_sk_storage_free() has unlinked selem from the map * which then made the above while((selem = ...)) loop - * exited immediately. + * exit immediately. * - * However, the bpf_sk_storage_free() still needs to access - * the smap->elem_size to do the uncharging in - * __selem_unlink_sk(). + * However, while freeing the storage one still needs to access the + * smap->elem_size to do the uncharging in + * bpf_selem_unlink_storage_nolock(). * - * Hence, wait another rcu grace period for the - * bpf_sk_storage_free() to finish. + * Hence, wait another rcu grace period for the storage to be freed. */ synchronize_rcu(); @@ -645,14 +653,15 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) /* U16_MAX is much more than enough for sk local storage * considering a tcp_sock is ~2k. */ -#define MAX_VALUE_SIZE \ +#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE \ min_t(u32, \ - (KMALLOC_MAX_SIZE - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem)), \ - (U16_MAX - sizeof(struct bpf_sk_storage_elem))) + (KMALLOC_MAX_SIZE - MAX_BPF_STACK - \ + sizeof(struct bpf_local_storage_elem)), \ + (U16_MAX - sizeof(struct bpf_local_storage_elem))) -static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) +static int bpf_local_storage_map_alloc_check(union bpf_attr *attr) { - if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK || + if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK || !(attr->map_flags & BPF_F_NO_PREALLOC) || attr->max_entries || attr->key_size != sizeof(int) || !attr->value_size || @@ -663,15 +672,15 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) if (!bpf_capable()) return -EPERM; - if (attr->value_size > MAX_VALUE_SIZE) + if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) return -E2BIG; return 0; } -static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) +static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr) { - struct bpf_sk_storage_map *smap; + struct bpf_local_storage_map *smap; unsigned int i; u32 nbuckets; u64 cost; @@ -707,7 +716,8 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) raw_spin_lock_init(&smap->buckets[i].lock); } - smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; + smap->elem_size = + sizeof(struct bpf_local_storage_elem) + attr->value_size; smap->cache_idx = cache_idx_get(); return &smap->map; @@ -719,10 +729,10 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, return -ENOTSUPP; } -static int bpf_sk_storage_map_check_btf(const struct bpf_map *map, - const struct btf *btf, - const struct btf_type *key_type, - const struct btf_type *value_type) +static int bpf_local_storage_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) { u32 int_data; @@ -738,7 +748,7 @@ static int bpf_sk_storage_map_check_btf(const struct bpf_map *map, static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) { - struct bpf_sk_storage_data *sdata; + struct bpf_local_storage_data *sdata; struct socket *sock; int fd, err; @@ -756,14 +766,15 @@ static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { - struct bpf_sk_storage_data *sdata; + struct bpf_local_storage_data *sdata; struct socket *sock; int fd, err; fd = *(int *)key; sock = sockfd_lookup(fd, &err); if (sock) { - sdata = sk_storage_update(sock->sk, map, value, map_flags); + sdata = bpf_local_storage_update(sock->sk, map, value, + map_flags); sockfd_put(sock); return PTR_ERR_OR_ZERO(sdata); } @@ -787,14 +798,14 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) return err; } -static struct bpf_sk_storage_elem * +static struct bpf_local_storage_elem * bpf_sk_storage_clone_elem(struct sock *newsk, - struct bpf_sk_storage_map *smap, - struct bpf_sk_storage_elem *selem) + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem) { - struct bpf_sk_storage_elem *copy_selem; + struct bpf_local_storage_elem *copy_selem; - copy_selem = selem_alloc(smap, newsk, NULL, true); + copy_selem = bpf_selem_alloc(smap, newsk, NULL, true); if (!copy_selem) return NULL; @@ -810,9 +821,9 @@ bpf_sk_storage_clone_elem(struct sock *newsk, int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) { - struct bpf_sk_storage *new_sk_storage = NULL; - struct bpf_sk_storage *sk_storage; - struct bpf_sk_storage_elem *selem; + struct bpf_local_storage *new_sk_storage = NULL; + struct bpf_local_storage *sk_storage; + struct bpf_local_storage_elem *selem; int ret = 0; RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); @@ -824,8 +835,8 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) goto out; hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { - struct bpf_sk_storage_elem *copy_selem; - struct bpf_sk_storage_map *smap; + struct bpf_local_storage_elem *copy_selem; + struct bpf_local_storage_map *smap; struct bpf_map *map; smap = rcu_dereference(SDATA(selem)->smap); @@ -833,7 +844,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) continue; /* Note that for lockless listeners adding new element - * here can race with cleanup in bpf_sk_storage_map_free. + * here can race with cleanup in bpf_local_storage_map_free. * Try to grab map refcnt to make sure that it's still * alive and prevent concurrent removal. */ @@ -849,8 +860,8 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) } if (new_sk_storage) { - selem_link_map(smap, copy_selem); - __selem_link_sk(new_sk_storage, copy_selem); + bpf_selem_link_map(smap, copy_selem); + bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); } else { ret = sk_storage_alloc(newsk, smap, copy_selem); if (ret) { @@ -861,7 +872,8 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) goto out; } - new_sk_storage = rcu_dereference(copy_selem->sk_storage); + new_sk_storage = + rcu_dereference(copy_selem->local_storage); } bpf_map_put(map); } @@ -879,7 +891,7 @@ out: BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags) { - struct bpf_sk_storage_data *sdata; + struct bpf_local_storage_data *sdata; if (flags > BPF_SK_STORAGE_GET_F_CREATE) return (unsigned long)NULL; @@ -895,7 +907,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, * destruction). */ refcount_inc_not_zero(&sk->sk_refcnt)) { - sdata = sk_storage_update(sk, map, value, BPF_NOEXIST); + sdata = bpf_local_storage_update(sk, map, value, BPF_NOEXIST); /* sk must be a fullsock (guaranteed by verifier), * so sock_gen_put() is unnecessary. */ @@ -922,15 +934,15 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) static int sk_storage_map_btf_id; const struct bpf_map_ops sk_storage_map_ops = { - .map_alloc_check = bpf_sk_storage_map_alloc_check, - .map_alloc = bpf_sk_storage_map_alloc, - .map_free = bpf_sk_storage_map_free, + .map_alloc_check = bpf_local_storage_map_alloc_check, + .map_alloc = bpf_local_storage_map_alloc, + .map_free = bpf_local_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, .map_update_elem = bpf_fd_sk_storage_update_elem, .map_delete_elem = bpf_fd_sk_storage_delete_elem, - .map_check_btf = bpf_sk_storage_map_check_btf, - .map_btf_name = "bpf_sk_storage_map", + .map_check_btf = bpf_local_storage_map_check_btf, + .map_btf_name = "bpf_local_storage_map", .map_btf_id = &sk_storage_map_btf_id, }; @@ -1022,7 +1034,7 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) u32 nr_maps = 0; int rem, err; - /* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as + /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as * the map_alloc_check() side also does. */ if (!bpf_capable()) @@ -1072,13 +1084,13 @@ err_free: } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); -static int diag_get(struct bpf_sk_storage_data *sdata, struct sk_buff *skb) +static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) { struct nlattr *nla_stg, *nla_value; - struct bpf_sk_storage_map *smap; + struct bpf_local_storage_map *smap; /* It cannot exceed max nlattr's payload */ - BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < MAX_VALUE_SIZE); + BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); if (!nla_stg) @@ -1114,9 +1126,9 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, { /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ unsigned int diag_size = nla_total_size(0); - struct bpf_sk_storage *sk_storage; - struct bpf_sk_storage_elem *selem; - struct bpf_sk_storage_map *smap; + struct bpf_local_storage *sk_storage; + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; struct nlattr *nla_stgs; unsigned int saved_len; int err = 0; @@ -1169,8 +1181,8 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, { /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ unsigned int diag_size = nla_total_size(0); - struct bpf_sk_storage *sk_storage; - struct bpf_sk_storage_data *sdata; + struct bpf_local_storage *sk_storage; + struct bpf_local_storage_data *sdata; struct nlattr *nla_stgs; unsigned int saved_len; int err = 0; @@ -1197,8 +1209,8 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, saved_len = skb->len; for (i = 0; i < diag->nr_maps; i++) { - sdata = __sk_storage_lookup(sk_storage, - (struct bpf_sk_storage_map *)diag->maps[i], + sdata = bpf_local_storage_lookup(sk_storage, + (struct bpf_local_storage_map *)diag->maps[i], false); if (!sdata) @@ -1235,19 +1247,19 @@ struct bpf_iter_seq_sk_storage_map_info { unsigned skip_elems; }; -static struct bpf_sk_storage_elem * +static struct bpf_local_storage_elem * bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, - struct bpf_sk_storage_elem *prev_selem) + struct bpf_local_storage_elem *prev_selem) { - struct bpf_sk_storage *sk_storage; - struct bpf_sk_storage_elem *selem; + struct bpf_local_storage *sk_storage; + struct bpf_local_storage_elem *selem; u32 skip_elems = info->skip_elems; - struct bpf_sk_storage_map *smap; + struct bpf_local_storage_map *smap; u32 bucket_id = info->bucket_id; u32 i, count, n_buckets; - struct bucket *b; + struct bpf_local_storage_map_bucket *b; - smap = (struct bpf_sk_storage_map *)info->map; + smap = (struct bpf_local_storage_map *)info->map; n_buckets = 1U << smap->bucket_log; if (bucket_id >= n_buckets) return NULL; @@ -1257,7 +1269,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, count = 0; while (selem) { selem = hlist_entry_safe(selem->map_node.next, - struct bpf_sk_storage_elem, map_node); + struct bpf_local_storage_elem, map_node); if (!selem) { /* not found, unlock and go to the next bucket */ b = &smap->buckets[bucket_id++]; @@ -1265,7 +1277,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, skip_elems = 0; break; } - sk_storage = rcu_dereference_raw(selem->sk_storage); + sk_storage = rcu_dereference_raw(selem->local_storage); if (sk_storage) { info->skip_elems = skip_elems + count; return selem; @@ -1278,7 +1290,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, raw_spin_lock_bh(&b->lock); count = 0; hlist_for_each_entry(selem, &b->list, map_node) { - sk_storage = rcu_dereference_raw(selem->sk_storage); + sk_storage = rcu_dereference_raw(selem->local_storage); if (sk_storage && count >= skip_elems) { info->bucket_id = i; info->skip_elems = count; @@ -1297,7 +1309,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) { - struct bpf_sk_storage_elem *selem; + struct bpf_local_storage_elem *selem; selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); if (!selem) @@ -1330,11 +1342,11 @@ DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, void *value) static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, - struct bpf_sk_storage_elem *selem) + struct bpf_local_storage_elem *selem) { struct bpf_iter_seq_sk_storage_map_info *info = seq->private; struct bpf_iter__bpf_sk_storage_map ctx = {}; - struct bpf_sk_storage *sk_storage; + struct bpf_local_storage *sk_storage; struct bpf_iter_meta meta; struct bpf_prog *prog; int ret = 0; @@ -1345,8 +1357,8 @@ static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, ctx.meta = &meta; ctx.map = info->map; if (selem) { - sk_storage = rcu_dereference_raw(selem->sk_storage); - ctx.sk = sk_storage->sk; + sk_storage = rcu_dereference_raw(selem->local_storage); + ctx.sk = sk_storage->owner; ctx.value = SDATA(selem)->data; } ret = bpf_iter_run_prog(prog, &ctx); @@ -1363,13 +1375,13 @@ static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) { struct bpf_iter_seq_sk_storage_map_info *info = seq->private; - struct bpf_sk_storage_map *smap; - struct bucket *b; + struct bpf_local_storage_map *smap; + struct bpf_local_storage_map_bucket *b; if (!v) { (void)__bpf_sk_storage_map_seq_show(seq, v); } else { - smap = (struct bpf_sk_storage_map *)info->map; + smap = (struct bpf_local_storage_map *)info->map; b = &smap->buckets[info->bucket_id]; raw_spin_unlock_bh(&b->lock); } diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index 473665cac67e..982a2d8aa844 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -589,7 +589,7 @@ static inline int check_stack(void) return 1; } -struct bpf_sk_storage_map { +struct bpf_local_storage_map { struct bpf_map map; } __attribute__((preserve_access_index)); @@ -602,8 +602,8 @@ struct { static inline int check_sk_storage(void) { - struct bpf_sk_storage_map *sk_storage = - (struct bpf_sk_storage_map *)&m_sk_storage; + struct bpf_local_storage_map *sk_storage = + (struct bpf_local_storage_map *)&m_sk_storage; struct bpf_map *map = (struct bpf_map *)&m_sk_storage; VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0)); -- cgit v1.3.1 From f836a56e84ffc9f1a1cd73f77e10404ca46a4616 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:15 +0200 Subject: bpf: Generalize bpf_sk_storage Refactor the functionality in bpf_sk_storage.c so that concept of storage linked to kernel objects can be extended to other objects like inode, task_struct etc. Each new local storage will still be a separate map and provide its own set of helpers. This allows for future object specific extensions and still share a lot of the underlying implementation. This includes the changes suggested by Martin in: https://lore.kernel.org/bpf/20200725013047.4006241-1-kafai@fb.com/ adding new map operations to support bpf_local_storage maps: * storages for different kernel objects to optionally have different memory charging strategy (map_local_storage_charge, map_local_storage_uncharge) * Functionality to extract the storage pointer from a pointer to the owning object (map_owner_storage_ptr) Co-developed-by: Martin KaFai Lau Signed-off-by: Martin KaFai Lau Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825182919.1118197-4-kpsingh@chromium.org --- include/linux/bpf.h | 8 ++ include/net/bpf_sk_storage.h | 52 +++++++++ include/uapi/linux/bpf.h | 8 +- net/core/bpf_sk_storage.c | 238 +++++++++++++++++++++++++++-------------- tools/include/uapi/linux/bpf.h | 8 +- 5 files changed, 228 insertions(+), 86 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 81f38e2fda78..8c443b93ac11 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -34,6 +34,8 @@ struct btf_type; struct exception_table_entry; struct seq_operations; struct bpf_iter_aux_info; +struct bpf_local_storage; +struct bpf_local_storage_map; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -104,6 +106,12 @@ struct bpf_map_ops { __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + /* Functions called by bpf_local_storage maps */ + int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, + void *owner, u32 size); + void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap, + void *owner, u32 size); + struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index 950c5aaba15e..9e631b5466e3 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -3,8 +3,15 @@ #ifndef _BPF_SK_STORAGE_H #define _BPF_SK_STORAGE_H +#include +#include +#include #include #include +#include +#include +#include +#include struct sock; @@ -13,6 +20,7 @@ void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; +struct bpf_local_storage_elem; struct bpf_sk_storage_diag; struct sk_buff; struct nlattr; @@ -34,6 +42,50 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache); void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, u16 idx); +/* Helper functions for bpf_local_storage */ +int bpf_local_storage_map_alloc_check(union bpf_attr *attr); + +struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr); + +struct bpf_local_storage_data * +bpf_local_storage_lookup(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + bool cacheit_lockit); + +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap); + +int bpf_local_storage_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type); + +void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem); + +bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem, + bool uncharge_omem); + +void bpf_selem_unlink(struct bpf_local_storage_elem *selem); + +void bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); + +void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); + +struct bpf_local_storage_elem * +bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, + bool charge_mem); + +int +bpf_local_storage_alloc(void *owner, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *first_selem); + +struct bpf_local_storage_data * +bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, + void *value, u64 map_flags); + #ifdef CONFIG_BPF_SYSCALL int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk); struct bpf_sk_storage_diag * diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 544b89a64918..2cbd137eed86 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3765,9 +3765,13 @@ enum { BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), }; -/* BPF_FUNC_sk_storage_get flags */ +/* BPF_FUNC__storage_get flags */ enum { - BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), + BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0), + /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility + * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead. + */ + BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE, }; /* BPF_FUNC_read_branch_records flags. */ diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index ec61ee7c7ee4..cd8b7017913b 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -84,7 +84,7 @@ struct bpf_local_storage_elem { struct bpf_local_storage { struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; struct hlist_head list; /* List of bpf_local_storage_elem */ - struct sock *owner; /* The object that owns the above "list" of + void *owner; /* The object that owns the above "list" of * bpf_local_storage_elem. */ struct rcu_head rcu; @@ -110,6 +110,33 @@ static int omem_charge(struct sock *sk, unsigned int size) return -ENOMEM; } +static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) +{ + struct bpf_map *map = &smap->map; + + if (!map->ops->map_local_storage_charge) + return 0; + + return map->ops->map_local_storage_charge(smap, owner, size); +} + +static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner, + u32 size) +{ + struct bpf_map *map = &smap->map; + + if (map->ops->map_local_storage_uncharge) + map->ops->map_local_storage_uncharge(smap, owner, size); +} + +static struct bpf_local_storage __rcu ** +owner_storage(struct bpf_local_storage_map *smap, void *owner) +{ + struct bpf_map *map = &smap->map; + + return map->ops->map_owner_storage_ptr(owner); +} + static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->snode); @@ -120,13 +147,13 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) return !hlist_unhashed(&selem->map_node); } -static struct bpf_local_storage_elem * -bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk, - void *value, bool charge_omem) +struct bpf_local_storage_elem * +bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, + void *value, bool charge_mem) { struct bpf_local_storage_elem *selem; - if (charge_omem && omem_charge(sk, smap->elem_size)) + if (charge_mem && mem_charge(smap, owner, smap->elem_size)) return NULL; selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN); @@ -136,8 +163,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk, return selem; } - if (charge_omem) - atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + if (charge_mem) + mem_uncharge(smap, owner, smap->elem_size); return NULL; } @@ -146,32 +173,32 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk, * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. */ -static bool -bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, - struct bpf_local_storage_elem *selem, - bool uncharge_omem) +bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem, + bool uncharge_mem) { struct bpf_local_storage_map *smap; bool free_local_storage; - struct sock *sk; + void *owner; smap = rcu_dereference(SDATA(selem)->smap); - sk = local_storage->owner; + owner = local_storage->owner; /* All uncharging on the owner must be done first. * The owner may be freed once the last selem is unlinked * from local_storage. */ - if (uncharge_omem) - atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + if (uncharge_mem) + mem_uncharge(smap, owner, smap->elem_size); free_local_storage = hlist_is_singular_node(&selem->snode, &local_storage->list); if (free_local_storage) { - atomic_sub(sizeof(struct bpf_local_storage), &sk->sk_omem_alloc); + mem_uncharge(smap, owner, sizeof(struct bpf_local_storage)); local_storage->owner = NULL; - /* After this RCU_INIT, sk may be freed and cannot be used */ - RCU_INIT_POINTER(sk->sk_bpf_storage, NULL); + + /* After this RCU_INIT, owner may be freed and cannot be used */ + RCU_INIT_POINTER(*owner_storage(smap, owner), NULL); /* local_storage is not freed now. local_storage->lock is * still held and raw_spin_unlock_bh(&local_storage->lock) @@ -209,23 +236,22 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) local_storage = rcu_dereference(selem->local_storage); raw_spin_lock_bh(&local_storage->lock); if (likely(selem_linked_to_storage(selem))) - free_local_storage = - bpf_selem_unlink_storage_nolock(local_storage, selem, true); + free_local_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, true); raw_spin_unlock_bh(&local_storage->lock); if (free_local_storage) kfree_rcu(local_storage, rcu); } -static void -bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, - struct bpf_local_storage_elem *selem) +void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem) { RCU_INIT_POINTER(selem->local_storage, local_storage); hlist_add_head(&selem->snode, &local_storage->list); } -static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) +void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) { struct bpf_local_storage_map *smap; struct bpf_local_storage_map_bucket *b; @@ -242,8 +268,8 @@ static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) raw_spin_unlock_bh(&b->lock); } -static void bpf_selem_link_map(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *selem) +void bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem) { struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem); @@ -253,7 +279,7 @@ static void bpf_selem_link_map(struct bpf_local_storage_map *smap, raw_spin_unlock_bh(&b->lock); } -static void bpf_selem_unlink(struct bpf_local_storage_elem *selem) +void bpf_selem_unlink(struct bpf_local_storage_elem *selem) { /* Always unlink from map before unlinking from local_storage * because selem will be freed after successfully unlinked from @@ -263,7 +289,7 @@ static void bpf_selem_unlink(struct bpf_local_storage_elem *selem) __bpf_selem_unlink_storage(selem); } -static struct bpf_local_storage_data * +struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit) @@ -329,40 +355,45 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata, return 0; } -static int sk_storage_alloc(struct sock *sk, +int bpf_local_storage_alloc(void *owner, struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *first_selem) { - struct bpf_local_storage *prev_sk_storage, *sk_storage; + struct bpf_local_storage *prev_storage, *storage; + struct bpf_local_storage **owner_storage_ptr; int err; - err = omem_charge(sk, sizeof(*sk_storage)); + err = mem_charge(smap, owner, sizeof(*storage)); if (err) return err; - sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN); - if (!sk_storage) { + storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN); + if (!storage) { err = -ENOMEM; goto uncharge; } - INIT_HLIST_HEAD(&sk_storage->list); - raw_spin_lock_init(&sk_storage->lock); - sk_storage->owner = sk; - bpf_selem_link_storage_nolock(sk_storage, first_selem); + INIT_HLIST_HEAD(&storage->list); + raw_spin_lock_init(&storage->lock); + storage->owner = owner; + + bpf_selem_link_storage_nolock(storage, first_selem); bpf_selem_link_map(smap, first_selem); - /* Publish sk_storage to sk. sk->sk_lock cannot be acquired. - * Hence, atomic ops is used to set sk->sk_bpf_storage - * from NULL to the newly allocated sk_storage ptr. + + owner_storage_ptr = + (struct bpf_local_storage **)owner_storage(smap, owner); + /* Publish storage to the owner. + * Instead of using any lock of the kernel object (i.e. owner), + * cmpxchg will work with any kernel object regardless what + * the running context is, bh, irq...etc. * - * From now on, the sk->sk_bpf_storage pointer is protected - * by the sk_storage->lock. Hence, when freeing - * the sk->sk_bpf_storage, the sk_storage->lock must - * be held before setting sk->sk_bpf_storage to NULL. + * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage) + * is protected by the storage->lock. Hence, when freeing + * the owner->storage, the storage->lock must be held before + * setting owner->storage ptr to NULL. */ - prev_sk_storage = cmpxchg((struct bpf_local_storage **)&sk->sk_bpf_storage, - NULL, sk_storage); - if (unlikely(prev_sk_storage)) { + prev_storage = cmpxchg(owner_storage_ptr, NULL, storage); + if (unlikely(prev_storage)) { bpf_selem_unlink_map(first_selem); err = -EAGAIN; goto uncharge; @@ -380,8 +411,8 @@ static int sk_storage_alloc(struct sock *sk, return 0; uncharge: - kfree(sk_storage); - atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc); + kfree(storage); + mem_uncharge(smap, owner, sizeof(*storage)); return err; } @@ -390,38 +421,37 @@ uncharge: * Otherwise, it will become a leak (and other memory issues * during map destruction). */ -static struct bpf_local_storage_data * -bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value, - u64 map_flags) +struct bpf_local_storage_data * +bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, + void *value, u64 map_flags) { struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_elem *selem; struct bpf_local_storage *local_storage; - struct bpf_local_storage_map *smap; int err; /* BPF_EXIST and BPF_NOEXIST cannot be both set */ if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) || /* BPF_F_LOCK can only be used in a value with spin_lock */ - unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map))) + unlikely((map_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(&smap->map))) return ERR_PTR(-EINVAL); - smap = (struct bpf_local_storage_map *)map; - local_storage = rcu_dereference(sk->sk_bpf_storage); + local_storage = rcu_dereference(*owner_storage(smap, owner)); if (!local_storage || hlist_empty(&local_storage->list)) { /* Very first elem for the owner */ err = check_flags(NULL, map_flags); if (err) return ERR_PTR(err); - selem = bpf_selem_alloc(smap, sk, value, true); + selem = bpf_selem_alloc(smap, owner, value, true); if (!selem) return ERR_PTR(-ENOMEM); - err = sk_storage_alloc(sk, smap, selem); + err = bpf_local_storage_alloc(owner, smap, selem); if (err) { kfree(selem); - atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + mem_uncharge(smap, owner, smap->elem_size); return ERR_PTR(err); } @@ -439,7 +469,7 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value, if (err) return ERR_PTR(err); if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) { - copy_map_value_locked(map, old_sdata->data, + copy_map_value_locked(&smap->map, old_sdata->data, value, false); return old_sdata; } @@ -464,7 +494,8 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value, goto unlock_err; if (old_sdata && (map_flags & BPF_F_LOCK)) { - copy_map_value_locked(map, old_sdata->data, value, false); + copy_map_value_locked(&smap->map, old_sdata->data, value, + false); selem = SELEM(old_sdata); goto unlock; } @@ -478,7 +509,7 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value, * old_sdata will not be uncharged later during * bpf_selem_unlink_storage_nolock(). */ - selem = bpf_selem_alloc(smap, sk, value, !old_sdata); + selem = bpf_selem_alloc(smap, owner, value, !old_sdata); if (!selem) { err = -ENOMEM; goto unlock_err; @@ -591,17 +622,12 @@ void bpf_sk_storage_free(struct sock *sk) kfree_rcu(sk_storage, rcu); } -static void bpf_local_storage_map_free(struct bpf_map *map) +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap) { struct bpf_local_storage_elem *selem; - struct bpf_local_storage_map *smap; struct bpf_local_storage_map_bucket *b; unsigned int i; - smap = (struct bpf_local_storage_map *)map; - - bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx); - /* Note that this map might be concurrently cloned from * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone * RCU read section to finish before proceeding. New RCU @@ -646,7 +672,16 @@ static void bpf_local_storage_map_free(struct bpf_map *map) synchronize_rcu(); kvfree(smap->buckets); - kfree(map); + kfree(smap); +} + +static void sk_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx); + bpf_local_storage_map_free(smap); } /* U16_MAX is much more than enough for sk local storage @@ -658,7 +693,7 @@ static void bpf_local_storage_map_free(struct bpf_map *map) sizeof(struct bpf_local_storage_elem)), \ (U16_MAX - sizeof(struct bpf_local_storage_elem))) -static int bpf_local_storage_map_alloc_check(union bpf_attr *attr) +int bpf_local_storage_map_alloc_check(union bpf_attr *attr) { if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK || !(attr->map_flags & BPF_F_NO_PREALLOC) || @@ -677,7 +712,7 @@ static int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } -static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr) +struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) { struct bpf_local_storage_map *smap; unsigned int i; @@ -717,8 +752,19 @@ static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr) smap->elem_size = sizeof(struct bpf_local_storage_elem) + attr->value_size; - smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache); + return smap; +} + +static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache); return &smap->map; } @@ -728,10 +774,10 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, return -ENOTSUPP; } -static int bpf_local_storage_map_check_btf(const struct bpf_map *map, - const struct btf *btf, - const struct btf_type *key_type, - const struct btf_type *value_type) +int bpf_local_storage_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) { u32 int_data; @@ -772,8 +818,9 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, fd = *(int *)key; sock = sockfd_lookup(fd, &err); if (sock) { - sdata = bpf_local_storage_update(sock->sk, map, value, - map_flags); + sdata = bpf_local_storage_update( + sock->sk, (struct bpf_local_storage_map *)map, value, + map_flags); sockfd_put(sock); return PTR_ERR_OR_ZERO(sdata); } @@ -862,7 +909,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) bpf_selem_link_map(smap, copy_selem); bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); } else { - ret = sk_storage_alloc(newsk, smap, copy_selem); + ret = bpf_local_storage_alloc(newsk, smap, copy_selem); if (ret) { kfree(copy_selem); atomic_sub(smap->elem_size, @@ -906,7 +953,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, * destruction). */ refcount_inc_not_zero(&sk->sk_refcnt)) { - sdata = bpf_local_storage_update(sk, map, value, BPF_NOEXIST); + sdata = bpf_local_storage_update( + sk, (struct bpf_local_storage_map *)map, value, + BPF_NOEXIST); /* sk must be a fullsock (guaranteed by verifier), * so sock_gen_put() is unnecessary. */ @@ -931,11 +980,33 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) return -ENOENT; } +static int sk_storage_charge(struct bpf_local_storage_map *smap, + void *owner, u32 size) +{ + return omem_charge(owner, size); +} + +static void sk_storage_uncharge(struct bpf_local_storage_map *smap, + void *owner, u32 size) +{ + struct sock *sk = owner; + + atomic_sub(size, &sk->sk_omem_alloc); +} + +static struct bpf_local_storage __rcu ** +sk_storage_ptr(void *owner) +{ + struct sock *sk = owner; + + return &sk->sk_bpf_storage; +} + static int sk_storage_map_btf_id; const struct bpf_map_ops sk_storage_map_ops = { .map_alloc_check = bpf_local_storage_map_alloc_check, - .map_alloc = bpf_local_storage_map_alloc, - .map_free = bpf_local_storage_map_free, + .map_alloc = sk_storage_map_alloc, + .map_free = sk_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, .map_update_elem = bpf_fd_sk_storage_update_elem, @@ -943,6 +1014,9 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_check_btf = bpf_local_storage_map_check_btf, .map_btf_name = "bpf_local_storage_map", .map_btf_id = &sk_storage_map_btf_id, + .map_local_storage_charge = sk_storage_charge, + .map_local_storage_uncharge = sk_storage_uncharge, + .map_owner_storage_ptr = sk_storage_ptr, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 544b89a64918..2cbd137eed86 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3765,9 +3765,13 @@ enum { BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), }; -/* BPF_FUNC_sk_storage_get flags */ +/* BPF_FUNC__storage_get flags */ enum { - BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), + BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0), + /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility + * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead. + */ + BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE, }; /* BPF_FUNC_read_branch_records flags. */ -- cgit v1.3.1 From 8ea636848aca35b9f97c5b5dee30225cf2dd0fe6 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:17 +0200 Subject: bpf: Implement bpf_local_storage for inodes Similar to bpf_local_storage for sockets, add local storage for inodes. The life-cycle of storage is managed with the life-cycle of the inode. i.e. the storage is destroyed along with the owning inode. The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the security blob which are now stackable and can co-exist with other LSMs. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825182919.1118197-6-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 29 +++ include/linux/bpf_types.h | 3 + include/uapi/linux/bpf.h | 40 +++- kernel/bpf/Makefile | 1 + kernel/bpf/bpf_inode_storage.c | 273 ++++++++++++++++++++++++ kernel/bpf/syscall.c | 3 +- kernel/bpf/verifier.c | 10 + security/bpf/hooks.c | 6 + tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +- tools/bpf/bpftool/bash-completion/bpftool | 3 +- tools/bpf/bpftool/map.c | 3 +- tools/include/uapi/linux/bpf.h | 40 +++- tools/lib/bpf/libbpf_probes.c | 5 +- 13 files changed, 410 insertions(+), 8 deletions(-) create mode 100644 kernel/bpf/bpf_inode_storage.c (limited to 'tools') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index af74712af585..aaacb6aafc87 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -17,9 +17,28 @@ #include #undef LSM_HOOK +struct bpf_storage_blob { + struct bpf_local_storage __rcu *storage; +}; + +extern struct lsm_blob_sizes bpf_lsm_blob_sizes; + int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog); +static inline struct bpf_storage_blob *bpf_inode( + const struct inode *inode) +{ + if (unlikely(!inode->i_security)) + return NULL; + + return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; +} + +extern const struct bpf_func_proto bpf_inode_storage_get_proto; +extern const struct bpf_func_proto bpf_inode_storage_delete_proto; +void bpf_inode_storage_free(struct inode *inode); + #else /* !CONFIG_BPF_LSM */ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, @@ -28,6 +47,16 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, return -EOPNOTSUPP; } +static inline struct bpf_storage_blob *bpf_inode( + const struct inode *inode) +{ + return NULL; +} + +static inline void bpf_inode_storage_free(struct inode *inode) +{ +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a52a5688418e..2e6f568377f1 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -107,6 +107,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif +#ifdef CONFIG_BPF_LSM +BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2cbd137eed86..b6bfcd085a76 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -155,6 +155,7 @@ enum bpf_map_type { BPF_MAP_TYPE_DEVMAP_HASH, BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, + BPF_MAP_TYPE_INODE_STORAGE, }; /* Note that tracing related programs such as @@ -3509,6 +3510,41 @@ union bpf_attr { * * **-EPERM** This helper cannot be used under the * current sock_ops->op. + * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) + * Description + * Get a bpf_local_storage from an *inode*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *inode* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this + * helper enforces the key must be an inode and the map must also + * be a **BPF_MAP_TYPE_INODE_STORAGE**. + * + * Underneath, the value is stored locally at *inode* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *inode*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * int bpf_inode_storage_delete(struct bpf_map *map, void *inode) + * Description + * Delete a bpf_local_storage from an *inode*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3655,7 +3691,9 @@ union bpf_attr { FN(get_task_stack), \ FN(load_hdr_opt), \ FN(store_hdr_opt), \ - FN(reserve_hdr_opt), + FN(reserve_hdr_opt), \ + FN(inode_storage_get), \ + FN(inode_storage_delete), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 6961ff400cba..bdc8cd1b6767 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -5,6 +5,7 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o +obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c new file mode 100644 index 000000000000..f3a44e929447 --- /dev/null +++ b/kernel/bpf/bpf_inode_storage.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Facebook + * Copyright 2020 Google LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_BPF_STORAGE_CACHE(inode_cache); + +static struct bpf_local_storage __rcu ** +inode_storage_ptr(void *owner) +{ + struct inode *inode = owner; + struct bpf_storage_blob *bsb; + + bsb = bpf_inode(inode); + if (!bsb) + return NULL; + return &bsb->storage; +} + +static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, + struct bpf_map *map, + bool cacheit_lockit) +{ + struct bpf_local_storage *inode_storage; + struct bpf_local_storage_map *smap; + struct bpf_storage_blob *bsb; + + bsb = bpf_inode(inode); + if (!bsb) + return NULL; + + inode_storage = rcu_dereference(bsb->storage); + if (!inode_storage) + return NULL; + + smap = (struct bpf_local_storage_map *)map; + return bpf_local_storage_lookup(inode_storage, smap, cacheit_lockit); +} + +void bpf_inode_storage_free(struct inode *inode) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage *local_storage; + bool free_inode_storage = false; + struct bpf_storage_blob *bsb; + struct hlist_node *n; + + bsb = bpf_inode(inode); + if (!bsb) + return; + + rcu_read_lock(); + + local_storage = rcu_dereference(bsb->storage); + if (!local_storage) { + rcu_read_unlock(); + return; + } + + /* Netiher the bpf_prog nor the bpf-map's syscall + * could be modifying the local_storage->list now. + * Thus, no elem can be added-to or deleted-from the + * local_storage->list by the bpf_prog or by the bpf-map's syscall. + * + * It is racing with bpf_local_storage_map_free() alone + * when unlinking elem from the local_storage->list and + * the map's bucket->list. + */ + raw_spin_lock_bh(&local_storage->lock); + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { + /* Always unlink from map before unlinking from + * local_storage. + */ + bpf_selem_unlink_map(selem); + free_inode_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, false); + } + raw_spin_unlock_bh(&local_storage->lock); + rcu_read_unlock(); + + /* free_inoode_storage should always be true as long as + * local_storage->list was non-empty. + */ + if (free_inode_storage) + kfree_rcu(local_storage, rcu); +} + +static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_local_storage_data *sdata; + struct file *f; + int fd; + + fd = *(int *)key; + f = fget_raw(fd); + if (!f) + return NULL; + + sdata = inode_storage_lookup(f->f_inode, map, true); + fput(f); + return sdata ? sdata->data : NULL; +} + +static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_local_storage_data *sdata; + struct file *f; + int fd; + + fd = *(int *)key; + f = fget_raw(fd); + if (!f || !inode_storage_ptr(f->f_inode)) + return -EBADF; + + sdata = bpf_local_storage_update(f->f_inode, + (struct bpf_local_storage_map *)map, + value, map_flags); + fput(f); + return PTR_ERR_OR_ZERO(sdata); +} + +static int inode_storage_delete(struct inode *inode, struct bpf_map *map) +{ + struct bpf_local_storage_data *sdata; + + sdata = inode_storage_lookup(inode, map, false); + if (!sdata) + return -ENOENT; + + bpf_selem_unlink(SELEM(sdata)); + + return 0; +} + +static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct file *f; + int fd, err; + + fd = *(int *)key; + f = fget_raw(fd); + if (!f) + return -EBADF; + + err = inode_storage_delete(f->f_inode, map); + fput(f); + return err; +} + +BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, + void *, value, u64, flags) +{ + struct bpf_local_storage_data *sdata; + + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + /* explicitly check that the inode_storage_ptr is not + * NULL as inode_storage_lookup returns NULL in this case and + * bpf_local_storage_update expects the owner to have a + * valid storage pointer. + */ + if (!inode_storage_ptr(inode)) + return (unsigned long)NULL; + + sdata = inode_storage_lookup(inode, map, true); + if (sdata) + return (unsigned long)sdata->data; + + /* This helper must only called from where the inode is gurranteed + * to have a refcount and cannot be freed. + */ + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + sdata = bpf_local_storage_update( + inode, (struct bpf_local_storage_map *)map, value, + BPF_NOEXIST); + return IS_ERR(sdata) ? (unsigned long)NULL : + (unsigned long)sdata->data; + } + + return (unsigned long)NULL; +} + +BPF_CALL_2(bpf_inode_storage_delete, + struct bpf_map *, map, struct inode *, inode) +{ + /* This helper must only called from where the inode is gurranteed + * to have a refcount and cannot be freed. + */ + return inode_storage_delete(inode, map); +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, + void *next_key) +{ + return -ENOTSUPP; +} + +static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache); + return &smap->map; +} + +static void inode_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx); + bpf_local_storage_map_free(smap); +} + +static int inode_storage_map_btf_id; +const struct bpf_map_ops inode_storage_map_ops = { + .map_alloc_check = bpf_local_storage_map_alloc_check, + .map_alloc = inode_storage_map_alloc, + .map_free = inode_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_fd_inode_storage_lookup_elem, + .map_update_elem = bpf_fd_inode_storage_update_elem, + .map_delete_elem = bpf_fd_inode_storage_delete_elem, + .map_check_btf = bpf_local_storage_map_check_btf, + .map_btf_name = "bpf_local_storage_map", + .map_btf_id = &inode_storage_map_btf_id, + .map_owner_storage_ptr = inode_storage_ptr, +}; + +BTF_ID_LIST(bpf_inode_storage_btf_ids) +BTF_ID_UNUSED +BTF_ID(struct, inode) + +const struct bpf_func_proto bpf_inode_storage_get_proto = { + .func = bpf_inode_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, + .btf_id = bpf_inode_storage_btf_ids, +}; + +const struct bpf_func_proto bpf_inode_storage_delete_proto = { + .func = bpf_inode_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .btf_id = bpf_inode_storage_btf_ids, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b46e973faee9..5443cea86cef 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -769,7 +769,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && - map->map_type != BPF_MAP_TYPE_SK_STORAGE) + map->map_type != BPF_MAP_TYPE_SK_STORAGE && + map->map_type != BPF_MAP_TYPE_INODE_STORAGE) return -ENOTSUPP; if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dd24503ab3d3..38748794518e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4311,6 +4311,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_sk_storage_delete) goto error; break; + case BPF_MAP_TYPE_INODE_STORAGE: + if (func_id != BPF_FUNC_inode_storage_get && + func_id != BPF_FUNC_inode_storage_delete) + goto error; + break; default: break; } @@ -4384,6 +4389,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) goto error; break; + case BPF_FUNC_inode_storage_get: + case BPF_FUNC_inode_storage_delete: + if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) + goto error; + break; default: break; } diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c index 32d32d485451..788667d582ae 100644 --- a/security/bpf/hooks.c +++ b/security/bpf/hooks.c @@ -11,6 +11,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(NAME, bpf_lsm_##NAME), #include #undef LSM_HOOK + LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free), }; static int __init bpf_lsm_init(void) @@ -20,7 +21,12 @@ static int __init bpf_lsm_init(void) return 0; } +struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = { + .lbs_inode = sizeof(struct bpf_storage_blob), +}; + DEFINE_LSM(bpf) = { .name = "bpf", .init = bpf_lsm_init, + .blobs = &bpf_lsm_blob_sizes }; diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 41e2a74252d0..083db6c2fc67 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -49,7 +49,7 @@ MAP COMMANDS | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** -| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** } +| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index f53ed2f1a4aa..7b68e3c0a5fb 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -704,7 +704,8 @@ _bpftool() lru_percpu_hash lpm_trie array_of_maps \ hash_of_maps devmap devmap_hash sockmap cpumap \ xskmap sockhash cgroup_storage reuseport_sockarray \ - percpu_cgroup_storage queue stack' -- \ + percpu_cgroup_storage queue stack sk_storage \ + struct_ops inode_storage' -- \ "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 3a27d31a1856..bc0071228f88 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -50,6 +50,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", [BPF_MAP_TYPE_RINGBUF] = "ringbuf", + [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@ -1442,7 +1443,7 @@ static int do_help(int argc, char **argv) " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" - " queue | stack | sk_storage | struct_ops | ringbuf }\n" + " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2]); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2cbd137eed86..b6bfcd085a76 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -155,6 +155,7 @@ enum bpf_map_type { BPF_MAP_TYPE_DEVMAP_HASH, BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, + BPF_MAP_TYPE_INODE_STORAGE, }; /* Note that tracing related programs such as @@ -3509,6 +3510,41 @@ union bpf_attr { * * **-EPERM** This helper cannot be used under the * current sock_ops->op. + * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) + * Description + * Get a bpf_local_storage from an *inode*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *inode* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this + * helper enforces the key must be an inode and the map must also + * be a **BPF_MAP_TYPE_INODE_STORAGE**. + * + * Underneath, the value is stored locally at *inode* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *inode*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * int bpf_inode_storage_delete(struct bpf_map *map, void *inode) + * Description + * Delete a bpf_local_storage from an *inode*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3655,7 +3691,9 @@ union bpf_attr { FN(get_task_stack), \ FN(load_hdr_opt), \ FN(store_hdr_opt), \ - FN(reserve_hdr_opt), + FN(reserve_hdr_opt), \ + FN(inode_storage_get), \ + FN(inode_storage_delete), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 010c9a76fd2b..5482a9b7ae2d 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -170,7 +170,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, return btf_fd; } -static int load_sk_storage_btf(void) +static int load_local_storage_btf(void) { const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l"; /* struct bpf_spin_lock { @@ -229,12 +229,13 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) key_size = 0; break; case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_INODE_STORAGE: btf_key_type_id = 1; btf_value_type_id = 3; value_size = 8; max_entries = 0; map_flags = BPF_F_NO_PREALLOC; - btf_fd = load_sk_storage_btf(); + btf_fd = load_local_storage_btf(); if (btf_fd < 0) return false; break; -- cgit v1.3.1 From 30897832d8b97e93833fb52c0a02951db3692ed2 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:18 +0200 Subject: bpf: Allow local storage to be used from LSM programs Adds support for both bpf_{sk, inode}_storage_{get, delete} to be used in LSM programs. These helpers are not used for tracing programs (currently) as their usage is tied to the life-cycle of the object and should only be used where the owning object won't be freed (when the owning object is passed as an argument to the LSM hook). Thus, they are safer to use in LSM hooks than tracing. Usage of local storage in tracing programs will probably follow a per function based whitelist approach. Since the UAPI helper signature for bpf_sk_storage expect a bpf_sock, it, leads to a compilation warning for LSM programs, it's also updated to accept a void * pointer instead. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200825182919.1118197-7-kpsingh@chromium.org --- include/net/bpf_sk_storage.h | 2 ++ include/uapi/linux/bpf.h | 7 +++++-- kernel/bpf/bpf_lsm.c | 21 ++++++++++++++++++++- net/core/bpf_sk_storage.c | 25 +++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 +++++-- 5 files changed, 57 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index 3c516dd07caf..119f4c9c3a9c 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -20,6 +20,8 @@ void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; +extern const struct bpf_func_proto sk_storage_get_btf_proto; +extern const struct bpf_func_proto sk_storage_delete_btf_proto; struct bpf_local_storage_elem; struct bpf_sk_storage_diag; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b6bfcd085a76..0e1cdf806fe1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2808,7 +2808,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags) * Description * Get a bpf-local-storage from a *sk*. * @@ -2824,6 +2824,9 @@ union bpf_attr { * "type". The bpf-local-storage "type" (i.e. the *map*) is * searched against all bpf-local-storages residing at *sk*. * + * *sk* is a kernel **struct sock** pointer for LSM program. + * *sk* is a **struct bpf_sock** pointer for other program types. + * * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be * used such that a new bpf-local-storage will be * created if one does not exist. *value* can be used @@ -2836,7 +2839,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, void *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index fb278144e9fd..9cd1428c7199 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include /* For every LSM hook that allows attachment of BPF programs, declare a nop * function where a BPF program can be attached. @@ -45,10 +47,27 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, return 0; } +static const struct bpf_func_proto * +bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_inode_storage_get: + return &bpf_inode_storage_get_proto; + case BPF_FUNC_inode_storage_delete: + return &bpf_inode_storage_delete_proto; + case BPF_FUNC_sk_storage_get: + return &sk_storage_get_btf_proto; + case BPF_FUNC_sk_storage_delete: + return &sk_storage_delete_btf_proto; + default: + return tracing_prog_func_proto(func_id, prog); + } +} + const struct bpf_prog_ops lsm_prog_ops = { }; const struct bpf_verifier_ops lsm_verifier_ops = { - .get_func_proto = tracing_prog_func_proto, + .get_func_proto = bpf_lsm_func_proto, .is_valid_access = btf_ctx_access, }; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index f29d9a9b4ea4..55fae03b4cc3 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -12,6 +12,7 @@ #include #include #include +#include DEFINE_BPF_STORAGE_CACHE(sk_cache); @@ -377,6 +378,30 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = { .arg2_type = ARG_PTR_TO_SOCKET, }; +BTF_ID_LIST(sk_storage_btf_ids) +BTF_ID_UNUSED +BTF_ID(struct, sock) + +const struct bpf_func_proto sk_storage_get_btf_proto = { + .func = bpf_sk_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, + .btf_id = sk_storage_btf_ids, +}; + +const struct bpf_func_proto sk_storage_delete_btf_proto = { + .func = bpf_sk_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .btf_id = sk_storage_btf_ids, +}; + struct bpf_sk_storage_diag { u32 nr_maps; struct bpf_map *maps[]; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b6bfcd085a76..0e1cdf806fe1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2808,7 +2808,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags) * Description * Get a bpf-local-storage from a *sk*. * @@ -2824,6 +2824,9 @@ union bpf_attr { * "type". The bpf-local-storage "type" (i.e. the *map*) is * searched against all bpf-local-storages residing at *sk*. * + * *sk* is a kernel **struct sock** pointer for LSM program. + * *sk* is a **struct bpf_sock** pointer for other program types. + * * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be * used such that a new bpf-local-storage will be * created if one does not exist. *value* can be used @@ -2836,7 +2839,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, void *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return -- cgit v1.3.1 From cd324d7abb3d850ae083bda1239f800abe21fe25 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:19 +0200 Subject: bpf: Add selftests for local_storage inode_local_storage: * Hook to the file_open and inode_unlink LSM hooks. * Create and unlink a temporary file. * Store some information in the inode's bpf_local_storage during file_open. * Verify that this information exists when the file is unlinked. sk_local_storage: * Hook to the socket_post_create and socket_bind LSM hooks. * Open and bind a socket and set the sk_storage in the socket_post_create hook using the start_server helper. * Verify if the information is set in the socket_bind hook. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825182919.1118197-8-kpsingh@chromium.org --- .../selftests/bpf/prog_tests/test_local_storage.c | 60 +++++++++ tools/testing/selftests/bpf/progs/local_storage.c | 140 +++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_local_storage.c create mode 100644 tools/testing/selftests/bpf/progs/local_storage.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c new file mode 100644 index 000000000000..91cd6f357246 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2020 Google LLC. + */ + +#include +#include + +#include "local_storage.skel.h" +#include "network_helpers.h" + +int create_and_unlink_file(void) +{ + char fname[PATH_MAX] = "/tmp/fileXXXXXX"; + int fd; + + fd = mkstemp(fname); + if (fd < 0) + return fd; + + close(fd); + unlink(fname); + return 0; +} + +void test_test_local_storage(void) +{ + struct local_storage *skel = NULL; + int err, duration = 0, serv_sk = -1; + + skel = local_storage__open_and_load(); + if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) + goto close_prog; + + err = local_storage__attach(skel); + if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) + goto close_prog; + + skel->bss->monitored_pid = getpid(); + + err = create_and_unlink_file(); + if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno)) + goto close_prog; + + CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", + "inode_local_storage not set\n"); + + serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) + goto close_prog; + + CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", + "sk_local_storage not set\n"); + + close(serv_sk); + +close_prog: + local_storage__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c new file mode 100644 index 000000000000..0758ba229ae0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +#define DUMMY_STORAGE_VALUE 0xdeadbeef + +int monitored_pid = 0; +int inode_storage_result = -1; +int sk_storage_result = -1; + +struct dummy_storage { + __u32 value; +}; + +struct { + __uint(type, BPF_MAP_TYPE_INODE_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct dummy_storage); +} inode_storage_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); + __type(key, int); + __type(value, struct dummy_storage); +} sk_storage_map SEC(".maps"); + +/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed. + */ +struct sock {} __attribute__((preserve_access_index)); +struct sockaddr {} __attribute__((preserve_access_index)); +struct socket { + struct sock *sk; +} __attribute__((preserve_access_index)); + +struct inode {} __attribute__((preserve_access_index)); +struct dentry { + struct inode *d_inode; +} __attribute__((preserve_access_index)); +struct file { + struct inode *f_inode; +} __attribute__((preserve_access_index)); + + +SEC("lsm/inode_unlink") +int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + if (storage->value == DUMMY_STORAGE_VALUE) + inode_storage_result = -1; + + inode_storage_result = + bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + + return 0; +} + +SEC("lsm/socket_bind") +int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + if (storage->value == DUMMY_STORAGE_VALUE) + sk_storage_result = -1; + + sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + return 0; +} + +SEC("lsm/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + storage->value = DUMMY_STORAGE_VALUE; + + return 0; +} + +SEC("lsm/file_open") +int BPF_PROG(file_open, struct file *file) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + if (!file->f_inode) + return 0; + + storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + storage->value = DUMMY_STORAGE_VALUE; + return 0; +} -- cgit v1.3.1 From 193a983c5bc4c62f66d62a9a6f9084cf3f2a0db2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:11 +0200 Subject: tools resolve_btfids: Add size check to get_id function To make sure we don't crash on malformed symbols. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-2-jolsa@kernel.org --- tools/bpf/resolve_btfids/main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 4d9ecb975862..35a172d3d80d 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -199,9 +199,16 @@ static char *get_id(const char *prefix_end) /* * __BTF_ID__func__vfs_truncate__0 * prefix_end = ^ + * pos = ^ */ - char *p, *id = strdup(prefix_end + sizeof("__") - 1); + int len = strlen(prefix_end); + int pos = sizeof("__") - 1; + char *p, *id; + if (pos >= len) + return NULL; + + id = strdup(prefix_end + pos); if (id) { /* * __BTF_ID__func__vfs_truncate__0 -- cgit v1.3.1 From a5f53b1d59c2fd35ebb55bbe47b05d596c0d466c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:12 +0200 Subject: tools resolve_btfids: Add support for set symbols The set symbol does not have the unique number suffix, so we need to give it a special parsing function. This was omitted in the first batch, because there was no set support yet, so it slipped in the testing. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-3-jolsa@kernel.org --- tools/bpf/resolve_btfids/main.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 35a172d3d80d..6152d1356d7b 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -227,6 +227,24 @@ static char *get_id(const char *prefix_end) return id; } +static struct btf_id *add_set(struct object *obj, char *name) +{ + /* + * __BTF_ID__set__name + * name = ^ + * id = ^ + */ + char *id = name + sizeof(BTF_SET "__") - 1; + int len = strlen(name); + + if (id >= name + len) { + pr_err("FAILED to parse set name: %s\n", name); + return NULL; + } + + return btf_id__add(&obj->sets, id, true); +} + static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) { char *id; @@ -383,7 +401,7 @@ static int symbols_collect(struct object *obj) id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1); /* set */ } else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) { - id = add_symbol(&obj->sets, prefix, sizeof(BTF_SET) - 1); + id = add_set(obj, prefix); /* * SET objects store list's count, which is encoded * in symbol's size, together with 'cnt' field hence -- cgit v1.3.1 From eae2e83e62633a2659e3bc690facba1c2fc9c45b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:19 +0200 Subject: bpf: Add BTF_SET_START/END macros Adding support to define sorted set of BTF ID values. Following defines sorted set of BTF ID values: BTF_SET_START(btf_allowlist_d_path) BTF_ID(func, vfs_truncate) BTF_ID(func, vfs_fallocate) BTF_ID(func, dentry_open) BTF_ID(func, vfs_getattr) BTF_ID(func, filp_close) BTF_SET_END(btf_allowlist_d_path) It defines following 'struct btf_id_set' variable to access values and count: struct btf_id_set btf_allowlist_d_path; Adding 'allowed' callback to struct bpf_func_proto, to allow verifier the check on allowed callers. Adding btf_id_set_contains function, which will be used by allowed callbacks to verify the caller's BTF ID value is within allowed set. Also removing extra '\' in __BTF_ID_LIST macro. Added BTF_SET_START_GLOBAL macro for global sets. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-10-jolsa@kernel.org --- include/linux/bpf.h | 4 ++++ include/linux/btf_ids.h | 51 ++++++++++++++++++++++++++++++++++++++++++- kernel/bpf/btf.c | 14 ++++++++++++ kernel/bpf/verifier.c | 5 +++++ tools/include/linux/btf_ids.h | 51 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 123 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 540f5e6c3788..a6131d95e31e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -317,6 +317,7 @@ struct bpf_func_proto { * for this argument. */ int *ret_btf_id; /* return value btf_id */ + bool (*allowed)(const struct bpf_prog *prog); }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -1878,4 +1879,7 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +struct btf_id_set; +bool btf_id_set_contains(struct btf_id_set *set, u32 id); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 4867d549e3c1..210b086188a3 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -3,6 +3,11 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +struct btf_id_set { + u32 cnt; + u32 ids[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ @@ -62,7 +67,7 @@ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ "." #scope " " #name "; \n" \ #name ":; \n" \ -".popsection; \n"); \ +".popsection; \n"); #define BTF_ID_LIST(name) \ __BTF_ID_LIST(name, local) \ @@ -88,12 +93,56 @@ asm( \ ".zero 4 \n" \ ".popsection; \n"); +/* + * The BTF_SET_START/END macros pair defines sorted list of + * BTF IDs plus its members count, with following layout: + * + * BTF_SET_START(list) + * BTF_ID(type1, name1) + * BTF_ID(type2, name2) + * BTF_SET_END(list) + * + * __BTF_ID__set__list: + * .zero 4 + * list: + * __BTF_ID__type1__name1__3: + * .zero 4 + * __BTF_ID__type2__name2__4: + * .zero 4 + * + */ +#define __BTF_SET_START(name, scope) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +"." #scope " __BTF_ID__set__" #name "; \n" \ +"__BTF_ID__set__" #name ":; \n" \ +".zero 4 \n" \ +".popsection; \n"); + +#define BTF_SET_START(name) \ +__BTF_ID_LIST(name, local) \ +__BTF_SET_START(name, local) + +#define BTF_SET_START_GLOBAL(name) \ +__BTF_ID_LIST(name, globl) \ +__BTF_SET_START(name, globl) + +#define BTF_SET_END(name) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".size __BTF_ID__set__" #name ", .-" #name " \n" \ +".popsection; \n"); \ +extern struct btf_id_set name; + #else #define BTF_ID_LIST(name) static u32 name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; +#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index df966acaaeb1..f9ac6935ab3c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include /* BTF (BPF Type Format) is the meta data format which describes @@ -4762,3 +4764,15 @@ u32 btf_id(const struct btf *btf) { return btf->id; } + +static int btf_id_cmp_func(const void *a, const void *b) +{ + const int *pa = a, *pb = b; + + return *pa - *pb; +} + +bool btf_id_set_contains(struct btf_id_set *set, u32 id) +{ + return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; +} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f003cee75d22..7e5908b83ec7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4859,6 +4859,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + if (fn->allowed && !fn->allowed(env->prog)) { + verbose(env, "helper call is not allowed in probe\n"); + return -EINVAL; + } + /* With LD_ABS/IND some JITs save/restore skb from r1. */ changes_data = bpf_helper_changes_pkt_data(fn->func); if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 4867d549e3c1..210b086188a3 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -3,6 +3,11 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +struct btf_id_set { + u32 cnt; + u32 ids[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ @@ -62,7 +67,7 @@ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ "." #scope " " #name "; \n" \ #name ":; \n" \ -".popsection; \n"); \ +".popsection; \n"); #define BTF_ID_LIST(name) \ __BTF_ID_LIST(name, local) \ @@ -88,12 +93,56 @@ asm( \ ".zero 4 \n" \ ".popsection; \n"); +/* + * The BTF_SET_START/END macros pair defines sorted list of + * BTF IDs plus its members count, with following layout: + * + * BTF_SET_START(list) + * BTF_ID(type1, name1) + * BTF_ID(type2, name2) + * BTF_SET_END(list) + * + * __BTF_ID__set__list: + * .zero 4 + * list: + * __BTF_ID__type1__name1__3: + * .zero 4 + * __BTF_ID__type2__name2__4: + * .zero 4 + * + */ +#define __BTF_SET_START(name, scope) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +"." #scope " __BTF_ID__set__" #name "; \n" \ +"__BTF_ID__set__" #name ":; \n" \ +".zero 4 \n" \ +".popsection; \n"); + +#define BTF_SET_START(name) \ +__BTF_ID_LIST(name, local) \ +__BTF_SET_START(name, local) + +#define BTF_SET_START_GLOBAL(name) \ +__BTF_ID_LIST(name, globl) \ +__BTF_SET_START(name, globl) + +#define BTF_SET_END(name) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".size __BTF_ID__set__" #name ", .-" #name " \n" \ +".popsection; \n"); \ +extern struct btf_id_set name; + #else #define BTF_ID_LIST(name) static u32 name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; +#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ -- cgit v1.3.1 From 6e22ab9da79343532cd3cde39df25e5a5478c692 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:20 +0200 Subject: bpf: Add d_path helper Adding d_path helper function that returns full path for given 'struct path' object, which needs to be the kernel BTF 'path' object. The path is returned in buffer provided 'buf' of size 'sz' and is zero terminated. bpf_d_path(&file->f_path, buf, size); The helper calls directly d_path function, so there's only limited set of function it can be called from. Adding just very modest set for the start. Updating also bpf.h tools uapi header and adding 'path' to bpf_helpers_doc.py script. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200825192124.710397-11-jolsa@kernel.org --- include/uapi/linux/bpf.h | 14 ++++++++++++ kernel/trace/bpf_trace.c | 48 ++++++++++++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 14 ++++++++++++ 4 files changed, 78 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0e1cdf806fe1..0388bc0200b0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3513,6 +3513,7 @@ union bpf_attr { * * **-EPERM** This helper cannot be used under the * current sock_ops->op. + * * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) * Description * Get a bpf_local_storage from an *inode*. @@ -3548,6 +3549,18 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * long bpf_d_path(struct path *path, char *buf, u32 sz) + * Description + * Return full path for given 'struct path' object, which + * needs to be the kernel BTF 'path' object. The path is + * returned in the provided buffer 'buf' of size 'sz' and + * is zero terminated. + * + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3697,6 +3710,7 @@ union bpf_attr { FN(reserve_hdr_opt), \ FN(inode_storage_get), \ FN(inode_storage_delete), \ + FN(d_path), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a8d4f253ed77..d973d891f2e2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1098,6 +1098,52 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = { .arg1_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) +{ + long len; + char *p; + + if (!sz) + return 0; + + p = d_path(path, buf, sz); + if (IS_ERR(p)) { + len = PTR_ERR(p); + } else { + len = buf + sz - p; + memmove(buf, p, len); + } + + return len; +} + +BTF_SET_START(btf_allowlist_d_path) +BTF_ID(func, vfs_truncate) +BTF_ID(func, vfs_fallocate) +BTF_ID(func, dentry_open) +BTF_ID(func, vfs_getattr) +BTF_ID(func, filp_close) +BTF_SET_END(btf_allowlist_d_path) + +static bool bpf_d_path_allowed(const struct bpf_prog *prog) +{ + return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id); +} + +BTF_ID_LIST(bpf_d_path_btf_ids) +BTF_ID(struct, path) + +static const struct bpf_func_proto bpf_d_path_proto = { + .func = bpf_d_path, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_d_path_btf_ids, + .allowed = bpf_d_path_allowed, +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1579,6 +1625,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->expected_attach_type == BPF_TRACE_ITER ? &bpf_seq_write_proto : NULL; + case BPF_FUNC_d_path: + return &bpf_d_path_proto; default: return raw_tp_prog_func_proto(func_id, prog); } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 5bfa448b4704..08388173973f 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -432,6 +432,7 @@ class PrinterHelpers(Printer): 'struct __sk_buff', 'struct sk_msg_md', 'struct xdp_md', + 'struct path', ] known_types = { '...', @@ -472,6 +473,7 @@ class PrinterHelpers(Printer): 'struct tcp_request_sock', 'struct udp6_sock', 'struct task_struct', + 'struct path', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0e1cdf806fe1..0388bc0200b0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3513,6 +3513,7 @@ union bpf_attr { * * **-EPERM** This helper cannot be used under the * current sock_ops->op. + * * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) * Description * Get a bpf_local_storage from an *inode*. @@ -3548,6 +3549,18 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * long bpf_d_path(struct path *path, char *buf, u32 sz) + * Description + * Return full path for given 'struct path' object, which + * needs to be the kernel BTF 'path' object. The path is + * returned in the provided buffer 'buf' of size 'sz' and + * is zero terminated. + * + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3697,6 +3710,7 @@ union bpf_attr { FN(reserve_hdr_opt), \ FN(inode_storage_get), \ FN(inode_storage_delete), \ + FN(d_path), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 762f85156835258c85a32bf81b283300b6c6bd17 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:22 +0200 Subject: selftests/bpf: Add verifier test for d_path helper Adding verifier test for attaching tracing program and calling d_path helper from within and testing that it's allowed for dentry_open function and denied for 'd_path' function with appropriate error. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-13-jolsa@kernel.org --- tools/testing/selftests/bpf/test_verifier.c | 19 +++++++++++++- tools/testing/selftests/bpf/verifier/d_path.c | 37 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/verifier/d_path.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 78a6bae56ea6..9be395d9dc64 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -114,6 +114,7 @@ struct bpf_test { bpf_testdata_struct_t retvals[MAX_TEST_RUNS]; }; enum bpf_attach_type expected_attach_type; + const char *kfunc; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -984,8 +985,24 @@ static void do_test_single(struct bpf_test *test, bool unpriv, attr.log_level = 4; attr.prog_flags = pflags; + if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { + attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, + attr.expected_attach_type); + if (attr.attach_btf_id < 0) { + printf("FAIL\nFailed to find BTF ID for '%s'!\n", + test->kfunc); + (*errors)++; + return; + } + } + fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); - if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) { + + /* BPF_PROG_TYPE_TRACING requires more setup and + * bpf_probe_prog_type won't give correct answer + */ + if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING && + !bpf_probe_prog_type(prog_type, 0)) { printf("SKIP (unsupported program type %d)\n", prog_type); skips++; goto close_fds; diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c new file mode 100644 index 000000000000..b988396379a7 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/d_path.c @@ -0,0 +1,37 @@ +{ + "d_path accept", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), + BPF_LD_IMM64(BPF_REG_3, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "dentry_open", +}, +{ + "d_path reject", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), + BPF_LD_IMM64(BPF_REG_3, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "helper call is not allowed in probe", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "d_path", +}, -- cgit v1.3.1 From e4d1af4b16f80a90d9cf3a09bee2012dcde45638 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:23 +0200 Subject: selftests/bpf: Add test for d_path helper Adding test for d_path helper which is pretty much copied from Wenbo Zhang's test for bpf_get_fd_path, which never made it in. The test is doing fstat/close on several fd types, and verifies we got the d_path helper working on kernel probes for vfs_getattr/filp_close functions. Original-patch-by: Wenbo Zhang Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-14-jolsa@kernel.org --- tools/testing/selftests/bpf/prog_tests/d_path.c | 147 ++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_d_path.c | 58 ++++++++++ 2 files changed, 205 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/d_path.c create mode 100644 tools/testing/selftests/bpf/progs/test_d_path.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c new file mode 100644 index 000000000000..058765da17e6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include + +#define MAX_PATH_LEN 128 +#define MAX_FILES 7 + +#include "test_d_path.skel.h" + +static int duration; + +static struct { + __u32 cnt; + char paths[MAX_FILES][MAX_PATH_LEN]; +} src; + +static int set_pathname(int fd, pid_t pid) +{ + char buf[MAX_PATH_LEN]; + + snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd); + return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN); +} + +static int trigger_fstat_events(pid_t pid) +{ + int sockfd = -1, procfd = -1, devfd = -1; + int localfd = -1, indicatorfd = -1; + int pipefd[2] = { -1, -1 }; + struct stat fileStat; + int ret = -1; + + /* unmountable pseudo-filesystems */ + if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n")) + return ret; + /* unmountable pseudo-filesystems */ + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (CHECK(sockfd < 0, "trigger", "scoket failed\n")) + goto out_close; + /* mountable pseudo-filesystems */ + procfd = open("/proc/self/comm", O_RDONLY); + if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n")) + goto out_close; + devfd = open("/dev/urandom", O_RDONLY); + if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n")) + goto out_close; + localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY); + if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n")) + goto out_close; + /* bpf_d_path will return path with (deleted) */ + remove("/tmp/d_path_loadgen.txt"); + indicatorfd = open("/tmp/", O_PATH); + if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n")) + goto out_close; + + ret = set_pathname(pipefd[0], pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n")) + goto out_close; + ret = set_pathname(pipefd[1], pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n")) + goto out_close; + ret = set_pathname(sockfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n")) + goto out_close; + ret = set_pathname(procfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n")) + goto out_close; + ret = set_pathname(devfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n")) + goto out_close; + ret = set_pathname(localfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n")) + goto out_close; + ret = set_pathname(indicatorfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n")) + goto out_close; + + /* triggers vfs_getattr */ + fstat(pipefd[0], &fileStat); + fstat(pipefd[1], &fileStat); + fstat(sockfd, &fileStat); + fstat(procfd, &fileStat); + fstat(devfd, &fileStat); + fstat(localfd, &fileStat); + fstat(indicatorfd, &fileStat); + +out_close: + /* triggers filp_close */ + close(pipefd[0]); + close(pipefd[1]); + close(sockfd); + close(procfd); + close(devfd); + close(localfd); + close(indicatorfd); + return ret; +} + +void test_d_path(void) +{ + struct test_d_path__bss *bss; + struct test_d_path *skel; + int err; + + skel = test_d_path__open_and_load(); + if (CHECK(!skel, "setup", "d_path skeleton failed\n")) + goto cleanup; + + err = test_d_path__attach(skel); + if (CHECK(err, "setup", "attach failed: %d\n", err)) + goto cleanup; + + bss = skel->bss; + bss->my_pid = getpid(); + + err = trigger_fstat_events(bss->my_pid); + if (err < 0) + goto cleanup; + + for (int i = 0; i < MAX_FILES; i++) { + CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN), + "check", + "failed to get stat path[%d]: %s vs %s\n", + i, src.paths[i], bss->paths_stat[i]); + CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN), + "check", + "failed to get close path[%d]: %s vs %s\n", + i, src.paths[i], bss->paths_close[i]); + /* The d_path helper returns size plus NUL char, hence + 1 */ + CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1, + "check", + "failed to match stat return [%d]: %d vs %zd [%s]\n", + i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1, + bss->paths_stat[i]); + CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1, + "check", + "failed to match stat return [%d]: %d vs %zd [%s]\n", + i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1, + bss->paths_stat[i]); + } + +cleanup: + test_d_path__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c new file mode 100644 index 000000000000..61f007855649 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_d_path.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include + +#define MAX_PATH_LEN 128 +#define MAX_FILES 7 + +pid_t my_pid = 0; +__u32 cnt_stat = 0; +__u32 cnt_close = 0; +char paths_stat[MAX_FILES][MAX_PATH_LEN] = {}; +char paths_close[MAX_FILES][MAX_PATH_LEN] = {}; +int rets_stat[MAX_FILES] = {}; +int rets_close[MAX_FILES] = {}; + +SEC("fentry/vfs_getattr") +int BPF_PROG(prog_stat, struct path *path, struct kstat *stat, + __u32 request_mask, unsigned int query_flags) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + __u32 cnt = cnt_stat; + int ret; + + if (pid != my_pid) + return 0; + + if (cnt >= MAX_FILES) + return 0; + ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN); + + rets_stat[cnt] = ret; + cnt_stat++; + return 0; +} + +SEC("fentry/filp_close") +int BPF_PROG(prog_close, struct file *file, void *id) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + __u32 cnt = cnt_close; + int ret; + + if (pid != my_pid) + return 0; + + if (cnt >= MAX_FILES) + return 0; + ret = bpf_d_path(&file->f_path, + paths_close[cnt], MAX_PATH_LEN); + + rets_close[cnt] = ret; + cnt_close++; + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From cd04b04de119a222c83936f7e9dbd46a650cb688 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:24 +0200 Subject: selftests/bpf: Add set test to resolve_btfids Adding test to for sets resolve_btfids. We're checking that testing set gets properly resolved and sorted. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-15-jolsa@kernel.org --- .../selftests/bpf/prog_tests/resolve_btfids.c | 39 +++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index 3b127cab4864..8826c652adad 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -47,6 +47,15 @@ BTF_ID(struct, S) BTF_ID(union, U) BTF_ID(func, func) +BTF_SET_START(test_set) +BTF_ID(typedef, S) +BTF_ID(typedef, T) +BTF_ID(typedef, U) +BTF_ID(struct, S) +BTF_ID(union, U) +BTF_ID(func, func) +BTF_SET_END(test_set) + static int __resolve_symbol(struct btf *btf, int type_id) { @@ -116,12 +125,40 @@ int test_resolve_btfids(void) */ for (j = 0; j < ARRAY_SIZE(test_lists); j++) { test_list = test_lists[j]; - for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) { + for (i = 0; i < ARRAY_SIZE(test_symbols); i++) { ret = CHECK(test_list[i] != test_symbols[i].id, "id_check", "wrong ID for %s (%d != %d)\n", test_symbols[i].name, test_list[i], test_symbols[i].id); + if (ret) + return ret; + } + } + + /* Check BTF_SET_START(test_set) IDs */ + for (i = 0; i < test_set.cnt; i++) { + bool found = false; + + for (j = 0; j < ARRAY_SIZE(test_symbols); j++) { + if (test_symbols[j].id != test_set.ids[i]) + continue; + found = true; + break; + } + + ret = CHECK(!found, "id_check", + "ID %d not found in test_symbols\n", + test_set.ids[i]); + if (ret) + break; + + if (i > 0) { + ret = CHECK(test_set.ids[i - 1] > test_set.ids[i], + "sort_check", + "test_set is not sorted\n"); + if (ret) + break; } } -- cgit v1.3.1 From d83971761fa278dcffa855110f024bf043ce6e6e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Aug 2020 12:18:45 +0200 Subject: selftests/bpf: Fix open call in trigger_fstat_events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alexei reported compile breakage on newer systems with following error: In file included from /usr/include/fcntl.h:290:0, 4814 from ./test_progs.h:29, 4815 from .../bpf-next/tools/testing/selftests/bpf/prog_tests/d_path.c:3: 4816In function ‘open’, 4817 inlined from ‘trigger_fstat_events’ at .../bpf-next/tools/testing/selftests/bpf/prog_tests/d_path.c:50:10, 4818 inlined from ‘test_d_path’ at .../bpf-next/tools/testing/selftests/bpf/prog_tests/d_path.c:119:6: 4819/usr/include/x86_64-linux-gnu/bits/fcntl2.h:50:4: error: call to ‘__open_missing_mode’ declared with attribute error: open with O_CREAT or O_TMPFILE in second argument needs 3 arguments 4820 __open_missing_mode (); 4821 ^~~~~~~~~~~~~~~~~~~~~~ We're missing permission bits as 3rd argument for open call with O_CREAT flag specified. Fixes: e4d1af4b16f8 ("selftests/bpf: Add test for d_path helper") Reported-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200826101845.747617-1-jolsa@kernel.org --- tools/testing/selftests/bpf/prog_tests/d_path.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index 058765da17e6..43ffbeacd680 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -47,7 +47,7 @@ static int trigger_fstat_events(pid_t pid) devfd = open("/dev/urandom", O_RDONLY); if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n")) goto out_close; - localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY); + localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644); if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n")) goto out_close; /* bpf_d_path will return path with (deleted) */ -- cgit v1.3.1 From 7100ff7c62682c2332300ffde8706578e1098e13 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 26 Aug 2020 09:59:07 +0100 Subject: selftests/bpf: Fix spelling mistake "scoket" -> "socket" There is a spelling mistake in a check error message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200826085907.43095-1-colin.king@canonical.com --- tools/testing/selftests/bpf/prog_tests/d_path.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index 43ffbeacd680..fc12e0d445ff 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -38,7 +38,7 @@ static int trigger_fstat_events(pid_t pid) return ret; /* unmountable pseudo-filesystems */ sockfd = socket(AF_INET, SOCK_STREAM, 0); - if (CHECK(sockfd < 0, "trigger", "scoket failed\n")) + if (CHECK(sockfd < 0, "trigger", "socket failed\n")) goto out_close; /* mountable pseudo-filesystems */ procfd = open("/proc/self/comm", O_RDONLY); -- cgit v1.3.1 From ab2dd173330a3f07142e68cd65682205036cd00f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Aug 2020 10:00:45 -0700 Subject: selftests/x86/fsgsbase: Reap a forgotten child The ptrace() test forgot to reap its child. Reap it. Signed-off-by: Andy Lutomirski Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/e7700a503f30e79ab35a63103938a19893dbeff2.1598461151.git.luto@kernel.org --- tools/testing/selftests/x86/fsgsbase.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 998319553523..0056e2597f53 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -517,6 +517,9 @@ static void test_ptrace_write_gsbase(void) END: ptrace(PTRACE_CONT, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); } int main() -- cgit v1.3.1 From 1b9abd1755ad947d7c9913e92e7837b533124c90 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 26 Aug 2020 10:00:46 -0700 Subject: selftests/x86/fsgsbase: Test PTRACE_PEEKUSER for GSBASE with invalid LDT GS This tests commit: 8ab49526b53d ("x86/fsgsbase/64: Fix NULL deref in 86_fsgsbase_read_task") Unpatched kernels will OOPS. Signed-off-by: Andy Lutomirski Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/c618ae86d1f757e01b1a8e79869f553cb88acf9a.1598461151.git.luto@kernel.org --- tools/testing/selftests/x86/fsgsbase.c | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 0056e2597f53..7161cfc2e60b 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -443,6 +443,68 @@ static void test_unexpected_base(void) #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r) +static void test_ptrace_write_gs_read_base(void) +{ + int status; + pid_t child = fork(); + + if (child < 0) + err(1, "fork"); + + if (child == 0) { + printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n"); + + printf("[RUN]\tARCH_SET_GS to 1\n"); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0) + err(1, "ARCH_SET_GS"); + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) + err(1, "PTRACE_TRACEME"); + + raise(SIGTRAP); + _exit(0); + } + + wait(&status); + + if (WSTOPSIG(status) == SIGTRAP) { + unsigned long base; + unsigned long gs_offset = USER_REGS_OFFSET(gs); + unsigned long base_offset = USER_REGS_OFFSET(gs_base); + + /* Read the initial base. It should be 1. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + if (base == 1) { + printf("[OK]\tGSBASE started at 1\n"); + } else { + nerrs++; + printf("[FAIL]\tGSBASE started at 0x%lx\n", base); + } + + printf("[RUN]\tSet GS = 0x7, read GSBASE\n"); + + /* Poke an LDT selector into GS. */ + if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0) + err(1, "PTRACE_POKEUSER"); + + /* And read the base. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + + if (base == 0 || base == 1) { + printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base); + } else { + nerrs++; + printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base); + } + } + + ptrace(PTRACE_CONT, child, NULL, NULL); + + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); +} + static void test_ptrace_write_gsbase(void) { int status; @@ -529,6 +591,9 @@ int main() shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + /* Do these tests before we have an LDT. */ + test_ptrace_write_gs_read_base(); + /* Probe FSGSBASE */ sethandler(SIGILL, sigill, 0); if (sigsetjmp(jmpbuf, 1) == 0) { -- cgit v1.3.1 From 6dc03dc71387e1dc65cf14efb49e5cf7062a2d46 Mon Sep 17 00:00:00 2001 From: Udip Pant Date: Tue, 25 Aug 2020 16:20:01 -0700 Subject: selftests/bpf: Add test for freplace program with write access This adds a selftest that tests the behavior when a freplace target program attempts to make a write access on a packet. The expectation is that the read or write access is granted based on the program type of the linked program and not itself (which is of type, for e.g., BPF_PROG_TYPE_EXT). This test fails without the associated patch on the verifier. Signed-off-by: Udip Pant Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825232003.2877030-3-udippant@fb.com --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 1 + tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c | 27 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/test_pkt_access.c | 20 ++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 197d0d217b56..7c7168963d52 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -123,6 +123,7 @@ static void test_func_replace(void) "freplace/get_skb_len", "freplace/get_skb_ifindex", "freplace/get_constant", + "freplace/test_pkt_write_access_subprog", }; test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 98e1efe14549..49a84a3a2306 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include +#include #include #include +#include #include #include #include @@ -151,4 +153,29 @@ int new_get_constant(long val) test_get_constant = 1; return test_get_constant; /* original get_constant() returns val - 122 */ } + +__u64 test_pkt_write_access_subprog = 0; +SEC("freplace/test_pkt_write_access_subprog") +int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off) +{ + + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + struct tcphdr *tcp; + + if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) + return -1; + + tcp = data + off; + if (tcp + 1 > data_end) + return -1; + + /* make modifications to the packet data */ + tcp->check++; + tcp->syn = 0; + + test_pkt_write_access_subprog = 1; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index e72eba4a93d2..852051064507 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -79,6 +79,24 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var) return skb->ifindex * val * var; } +__attribute__ ((noinline)) +int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off) +{ + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + struct tcphdr *tcp = NULL; + + if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) + return -1; + + tcp = data + off; + if (tcp + 1 > data_end) + return -1; + /* make modification to the packet data */ + tcp->check++; + return 0; +} + SEC("classifier/test_pkt_access") int test_pkt_access(struct __sk_buff *skb) { @@ -117,6 +135,8 @@ int test_pkt_access(struct __sk_buff *skb) if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex) return TC_ACT_SHOT; if (tcp) { + if (test_pkt_write_access_subprog(skb, (void *)tcp - data)) + return TC_ACT_SHOT; if (((void *)(tcp) + 20) > data_end || proto != 6) return TC_ACT_SHOT; barrier(); /* to force ordering of checks */ -- cgit v1.3.1 From 50d19736aff497a4c25ec7e36375195bfd8570cd Mon Sep 17 00:00:00 2001 From: Udip Pant Date: Tue, 25 Aug 2020 16:20:02 -0700 Subject: selftests/bpf: Test for checking return code for the extended prog This adds test to enforce same check for the return code for the extended prog as it is enforced for the target program. It asserts failure for a return code, which is permitted without the patch in this series, while it is restricted after the application of this patch. Signed-off-by: Udip Pant Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825232003.2877030-4-udippant@fb.com --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 40 ++++++++++++++++++++++ .../selftests/bpf/progs/freplace_connect_v4_prog.c | 19 ++++++++++ 2 files changed, 59 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 7c7168963d52..d295ca9bbf96 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -142,10 +142,50 @@ static void test_func_replace_verify(void) prog_name, false); } +static void test_func_replace_return_code(void) +{ + /* + * standalone test that asserts failure to load freplace prog + * because of invalid return code. + */ + struct bpf_object *obj = NULL, *pkt_obj; + int err, pkt_fd; + __u32 duration = 0; + const char *target_obj_file = "./connect4_prog.o"; + const char *obj_file = "./freplace_connect_v4_prog.o"; + + err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + /* the target prog should load fine */ + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", + target_obj_file, err, errno)) + return; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .attach_prog_fd = pkt_fd, + ); + + obj = bpf_object__open_file(obj_file, &opts); + if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", + "failed to open %s: %ld\n", obj_file, + PTR_ERR(obj))) + goto close_prog; + + /* It should fail to load the program */ + err = bpf_object__load(obj); + if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err)) + goto close_prog; + +close_prog: + if (!IS_ERR_OR_NULL(obj)) + bpf_object__close(obj); + bpf_object__close(pkt_obj); +} + void test_fexit_bpf2bpf(void) { test_target_no_callees(); test_target_yes_callees(); test_func_replace(); test_func_replace_verify(); + test_func_replace_return_code(); } diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c new file mode 100644 index 000000000000..544e5ac90461 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include +#include +#include +#include +#include +#include + +SEC("freplace/connect_v4_prog") +int new_connect_v4_prog(struct bpf_sock_addr *ctx) +{ + // return value thats in invalid range + return 255; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 1410620cf20e7e23cce17983e9a81af659b28583 Mon Sep 17 00:00:00 2001 From: Udip Pant Date: Tue, 25 Aug 2020 16:20:03 -0700 Subject: selftests/bpf: Test for map update access from within EXT programs This adds further tests to ensure access permissions and restrictions are applied properly for some map types such as sock-map. It also adds another negative tests to assert static functions cannot be replaced. In the 'unreliable' mode it still fails with error 'tracing progs cannot use bpf_spin_lock yet' with the change in the verifier Signed-off-by: Udip Pant Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825232003.2877030-5-udippant@fb.com --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 33 ++++++++++++++++-- .../selftests/bpf/progs/freplace_attach_probe.c | 40 ++++++++++++++++++++++ .../selftests/bpf/progs/freplace_cls_redirect.c | 34 ++++++++++++++++++ 3 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/freplace_attach_probe.c create mode 100644 tools/testing/selftests/bpf/progs/freplace_cls_redirect.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index d295ca9bbf96..a550dab9ba7a 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -142,7 +142,20 @@ static void test_func_replace_verify(void) prog_name, false); } -static void test_func_replace_return_code(void) +static void test_func_sockmap_update(void) +{ + const char *prog_name[] = { + "freplace/cls_redirect", + }; + test_fexit_bpf2bpf_common("./freplace_cls_redirect.o", + "./test_cls_redirect.o", + ARRAY_SIZE(prog_name), + prog_name, false); +} + +static void test_obj_load_failure_common(const char *obj_file, + const char *target_obj_file) + { /* * standalone test that asserts failure to load freplace prog @@ -151,8 +164,6 @@ static void test_func_replace_return_code(void) struct bpf_object *obj = NULL, *pkt_obj; int err, pkt_fd; __u32 duration = 0; - const char *target_obj_file = "./connect4_prog.o"; - const char *obj_file = "./freplace_connect_v4_prog.o"; err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); @@ -181,11 +192,27 @@ close_prog: bpf_object__close(pkt_obj); } +static void test_func_replace_return_code(void) +{ + /* test invalid return code in the replaced program */ + test_obj_load_failure_common("./freplace_connect_v4_prog.o", + "./connect4_prog.o"); +} + +static void test_func_map_prog_compatibility(void) +{ + /* test with spin lock map value in the replaced program */ + test_obj_load_failure_common("./freplace_attach_probe.o", + "./test_attach_probe.o"); +} + void test_fexit_bpf2bpf(void) { test_target_no_callees(); test_target_yes_callees(); test_func_replace(); test_func_replace_verify(); + test_func_sockmap_update(); test_func_replace_return_code(); + test_func_map_prog_compatibility(); } diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c new file mode 100644 index 000000000000..bb2a77c5b62b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include +#include +#include + +#define VAR_NUM 2 + +struct hmap_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct hmap_elem); +} hash_map SEC(".maps"); + +SEC("freplace/handle_kprobe") +int new_handle_kprobe(struct pt_regs *ctx) +{ + struct hmap_elem zero = {}, *val; + int key = 0; + + val = bpf_map_lookup_elem(&hash_map, &key); + if (!val) + return 1; + /* spin_lock in hash map */ + bpf_spin_lock(&val->lock); + val->var[0] = 99; + bpf_spin_unlock(&val->lock); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c new file mode 100644 index 000000000000..68a5a9db928a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include +#include +#include +#include + +struct bpf_map_def SEC("maps") sock_map = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +SEC("freplace/cls_redirect") +int freplace_cls_redirect_test(struct __sk_buff *skb) +{ + int ret = 0; + const int zero = 0; + struct bpf_sock *sk; + + sk = bpf_map_lookup_elem(&sock_map, &zero); + if (!sk) + return TC_ACT_SHOT; + + ret = bpf_map_update_elem(&sock_map, &zero, sk, 0); + bpf_sk_release(sk); + + return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From ef05afa66c59c2031a3798916ef3ff3778232129 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2020 00:55:49 -0700 Subject: libbpf: Fix unintentional success return code in bpf_object__load There are code paths where EINVAL is returned directly without setting errno. In that case, errno could be 0, which would mask the failure. For example, if a careless programmer set log_level to 10000 out of laziness, they would have to spend a long time trying to figure out why. Fixes: 4f33ddb4e3e2 ("libbpf: Propagate EPERM to caller on program load") Signed-off-by: Alex Gartrell Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200826075549.1858580-1-alexgartrell@gmail.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2e2523d8bb6d..8f9e7d281225 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6067,7 +6067,7 @@ retry_load: free(log_buf); goto retry_load; } - ret = -errno; + ret = errno ? -errno : -LIBBPF_ERRNO__LOAD; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("load bpf program failed: %s\n", cp); pr_perm_msg(ret); -- cgit v1.3.1 From 05290a2773e798351cdc69d5373a431b4acdd184 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 26 Aug 2020 19:48:55 +0300 Subject: selftests: fib_nexthops: Test IPv6 route with group after removing IPv4 nexthops Test that an IPv6 route can not use a nexthop group with mixed IPv4 and IPv6 nexthops, but can use it after deleting the IPv4 nexthops. Output without previous patch: # ./fib_nexthops.sh -t ipv6_fcnal_runtime IPv6 functional runtime ----------------------- TEST: Route add [ OK ] TEST: Route delete [ OK ] TEST: Ping with nexthop [ OK ] TEST: Ping - multipath [ OK ] TEST: Ping - blackhole [ OK ] TEST: Ping - blackhole replaced with gateway [ OK ] TEST: Ping - gateway replaced by blackhole [ OK ] TEST: Ping - group with blackhole [ OK ] TEST: Ping - group blackhole replaced with gateways [ OK ] TEST: IPv6 route with device only nexthop [ OK ] TEST: IPv6 multipath route with nexthop mix - dev only + gw [ OK ] TEST: IPv6 route can not have a v4 gateway [ OK ] TEST: Nexthop replace - v6 route, v4 nexthop [ OK ] TEST: Nexthop replace of group entry - v6 route, v4 nexthop [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route using a group after deleting v4 gateways [FAIL] TEST: Nexthop with default route and rpfilter [ OK ] TEST: Nexthop with multipath default route and rpfilter [ OK ] Tests passed: 18 Tests failed: 1 Output with previous patch: bash-5.0# ./fib_nexthops.sh -t ipv6_fcnal_runtime IPv6 functional runtime ----------------------- TEST: Route add [ OK ] TEST: Route delete [ OK ] TEST: Ping with nexthop [ OK ] TEST: Ping - multipath [ OK ] TEST: Ping - blackhole [ OK ] TEST: Ping - blackhole replaced with gateway [ OK ] TEST: Ping - gateway replaced by blackhole [ OK ] TEST: Ping - group with blackhole [ OK ] TEST: Ping - group blackhole replaced with gateways [ OK ] TEST: IPv6 route with device only nexthop [ OK ] TEST: IPv6 multipath route with nexthop mix - dev only + gw [ OK ] TEST: IPv6 route can not have a v4 gateway [ OK ] TEST: Nexthop replace - v6 route, v4 nexthop [ OK ] TEST: Nexthop replace of group entry - v6 route, v4 nexthop [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route using a group after deleting v4 gateways [ OK ] TEST: Nexthop with default route and rpfilter [ OK ] TEST: Nexthop with multipath default route and rpfilter [ OK ] Tests passed: 19 Tests failed: 0 Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthops.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 22dc2f3d428b..06e4f12e838d 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -739,6 +739,21 @@ ipv6_fcnal_runtime() run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1" log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop" + run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 124 group 86/87/88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop del id 88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop del id 87" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 0 "IPv6 route using a group after removing v4 gateways" + $IP nexthop flush >/dev/null 2>&1 # -- cgit v1.3.1 From 041bc0dce524aa8435d1c088d718090095d1f27c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 26 Aug 2020 19:48:57 +0300 Subject: selftests: fib_nexthops: Test IPv6 route with group after replacing IPv4 nexthops Test that an IPv6 route can not use a nexthop group with mixed IPv4 and IPv6 nexthops, but can use it after replacing the IPv4 nexthops with IPv6 nexthops. Output without previous patch: # ./fib_nexthops.sh -t ipv6_fcnal_runtime IPv6 functional runtime ----------------------- TEST: Route add [ OK ] TEST: Route delete [ OK ] TEST: Ping with nexthop [ OK ] TEST: Ping - multipath [ OK ] TEST: Ping - blackhole [ OK ] TEST: Ping - blackhole replaced with gateway [ OK ] TEST: Ping - gateway replaced by blackhole [ OK ] TEST: Ping - group with blackhole [ OK ] TEST: Ping - group blackhole replaced with gateways [ OK ] TEST: IPv6 route with device only nexthop [ OK ] TEST: IPv6 multipath route with nexthop mix - dev only + gw [ OK ] TEST: IPv6 route can not have a v4 gateway [ OK ] TEST: Nexthop replace - v6 route, v4 nexthop [ OK ] TEST: Nexthop replace of group entry - v6 route, v4 nexthop [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route using a group after removing v4 gateways [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route using a group after replacing v4 gateways [FAIL] TEST: Nexthop with default route and rpfilter [ OK ] TEST: Nexthop with multipath default route and rpfilter [ OK ] Tests passed: 21 Tests failed: 1 Output with previous patch: # ./fib_nexthops.sh -t ipv6_fcnal_runtime IPv6 functional runtime ----------------------- TEST: Route add [ OK ] TEST: Route delete [ OK ] TEST: Ping with nexthop [ OK ] TEST: Ping - multipath [ OK ] TEST: Ping - blackhole [ OK ] TEST: Ping - blackhole replaced with gateway [ OK ] TEST: Ping - gateway replaced by blackhole [ OK ] TEST: Ping - group with blackhole [ OK ] TEST: Ping - group blackhole replaced with gateways [ OK ] TEST: IPv6 route with device only nexthop [ OK ] TEST: IPv6 multipath route with nexthop mix - dev only + gw [ OK ] TEST: IPv6 route can not have a v4 gateway [ OK ] TEST: Nexthop replace - v6 route, v4 nexthop [ OK ] TEST: Nexthop replace of group entry - v6 route, v4 nexthop [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route using a group after removing v4 gateways [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route can not have a group with v4 and v6 gateways [ OK ] TEST: IPv6 route using a group after replacing v4 gateways [ OK ] TEST: Nexthop with default route and rpfilter [ OK ] TEST: Nexthop with multipath default route and rpfilter [ OK ] Tests passed: 22 Tests failed: 0 Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthops.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 06e4f12e838d..b74884d52913 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -754,6 +754,21 @@ ipv6_fcnal_runtime() run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" log_test $? 0 "IPv6 route using a group after removing v4 gateways" + run_cmd "$IP ro delete 2001:db8:101::1/128" + run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop replace id 124 group 86/87/88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 0 "IPv6 route using a group after replacing v4 gateways" + $IP nexthop flush >/dev/null 2>&1 # -- cgit v1.3.1 From f5493c514cdbcd8c3f9f3ffca4f68b97f122e60a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 24 Aug 2020 23:46:09 -0700 Subject: selftests/bpf: Add verifier tests for xor operation Added some test_verifier bounds check test cases for xor operations. $ ./test_verifier ... #78/u bounds check for reg = 0, reg xor 1 OK #78/p bounds check for reg = 0, reg xor 1 OK #79/u bounds check for reg32 = 0, reg32 xor 1 OK #79/p bounds check for reg32 = 0, reg32 xor 1 OK #80/u bounds check for reg = 2, reg xor 3 OK #80/p bounds check for reg = 2, reg xor 3 OK #81/u bounds check for reg = any, reg xor 3 OK #81/p bounds check for reg = any, reg xor 3 OK #82/u bounds check for reg32 = any, reg32 xor 3 OK #82/p bounds check for reg32 = any, reg32 xor 3 OK #83/u bounds check for reg > 0, reg xor 3 OK #83/p bounds check for reg > 0, reg xor 3 OK #84/u bounds check for reg32 > 0, reg32 xor 3 OK #84/p bounds check for reg32 > 0, reg32 xor 3 OK ... Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Cc: John Fastabend Link: https://lore.kernel.org/bpf/20200825064609.2018077-1-yhs@fb.com --- tools/testing/selftests/bpf/verifier/bounds.c | 146 ++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 4d6645f2874c..dac40de3f868 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -557,3 +557,149 @@ .result = ACCEPT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "bounds check for reg = 0, reg xor 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg32 = 0, reg32 xor 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg = 2, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg = any, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + .errstr_unpriv = "invalid access to map value", +}, +{ + "bounds check for reg32 = any, reg32 xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + .errstr_unpriv = "invalid access to map value", +}, +{ + "bounds check for reg > 0, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg32 > 0, reg32 xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, -- cgit v1.3.1 From 2e80be60c465a4f8559327340eaf40845dd7797a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 26 Aug 2020 21:11:09 -0700 Subject: libbpf: Fix compilation warnings for 64-bit printf args Fix compilation warnings due to __u64 defined differently as `unsigned long` or `unsigned long long` on different architectures (e.g., ppc64le differs from x86-64). Also cast one argument to size_t to fix printf warning of similar nature. Fixes: eacaaed784e2 ("libbpf: Implement enum value-based CO-RE relocations") Fixes: 50e09460d9f8 ("libbpf: Skip well-known ELF sections when iterating ELF") Reported-by: Naresh Kamboju Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200827041109.3613090-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8f9e7d281225..8cdb2528482e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2823,7 +2823,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.bss = data; obj->efile.bss_shndx = idx; } else { - pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, sh.sh_size); + pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, + (size_t)sh.sh_size); } } @@ -5244,7 +5245,8 @@ static int bpf_core_patch_insn(struct bpf_program *prog, if (res->validate && imm != orig_val) { pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", bpf_program__title(prog, false), relo_idx, - insn_idx, imm, orig_val, new_val); + insn_idx, (unsigned long long)imm, + orig_val, new_val); return -EINVAL; } @@ -5252,7 +5254,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog, insn[1].imm = 0; /* currently only 32-bit values are supported */ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", bpf_program__title(prog, false), relo_idx, insn_idx, - imm, new_val); + (unsigned long long)imm, new_val); break; } default: -- cgit v1.3.1 From 884ee754f5aedbe54406a4d308a6cc57335747ce Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Thu, 27 Aug 2020 13:07:09 +0200 Subject: selftests/livepatch: Do not check order when using "comm" for dmesg checking check_result() uses "comm" to check expected results of selftests output in dmesg. Everything works fine if timestamps in dmesg are unique. If not, like in this example [ 86.844422] test_klp_callbacks_demo: pre_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_LIVE] Normal state [ 86.844422] livepatch: 'test_klp_callbacks_demo': starting unpatching transition , "comm" fails with "comm: file 2 is not in sorted order". Suppress the order checking with --nocheck-order option. Fixes: 2f3f651f3756 ("selftests/livepatch: Use "comm" instead of "diff" for dmesg") Signed-off-by: Miroslav Benes Acked-by: Joe Lawrence Signed-off-by: Jiri Kosina --- tools/testing/selftests/livepatch/functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index 1aba83c87ad3..846c7ed71556 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -278,7 +278,7 @@ function check_result { # help differentiate repeated testing runs. Remove them with a # post-comparison sed filter. - result=$(dmesg | comm -13 "$SAVED_DMESG" - | \ + result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ sed 's/^\[[ 0-9.]*\] //') -- cgit v1.3.1 From 8ec90bfd1aeb39f44c3e704aaa9ca88c52e6866a Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 27 Aug 2020 11:09:47 -0600 Subject: selftests: mount: add nosymfollow tests Add tests for the new 'nosymfollow' mount option. We test to make sure that symlink traversal fails with ELOOP when 'nosymfollow' is set, but that readlink(2) and realpath(3) still work as expected. We also verify that statfs(2) correctly returns ST_NOSYMFOLLOW when we are mounted with the 'nosymfollow' option. Signed-off-by: Ross Zwisler Signed-off-by: Al Viro --- tools/testing/selftests/mount/.gitignore | 1 + tools/testing/selftests/mount/Makefile | 4 +- tools/testing/selftests/mount/nosymfollow-test.c | 218 +++++++++++++++++++++ tools/testing/selftests/mount/run_nosymfollow.sh | 4 + tools/testing/selftests/mount/run_tests.sh | 12 -- .../selftests/mount/run_unprivileged_remount.sh | 12 ++ 6 files changed, 237 insertions(+), 14 deletions(-) create mode 100644 tools/testing/selftests/mount/nosymfollow-test.c create mode 100755 tools/testing/selftests/mount/run_nosymfollow.sh delete mode 100755 tools/testing/selftests/mount/run_tests.sh create mode 100755 tools/testing/selftests/mount/run_unprivileged_remount.sh (limited to 'tools') diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore index 0bc64a6d4c18..17f2d8415162 100644 --- a/tools/testing/selftests/mount/.gitignore +++ b/tools/testing/selftests/mount/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only unprivileged-remount-test +nosymfollow-test diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index 026890744215..2d9454841644 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Wall \ -O2 -TEST_PROGS := run_tests.sh -TEST_GEN_FILES := unprivileged-remount-test +TEST_PROGS := run_unprivileged_remount.sh run_nosymfollow.sh +TEST_GEN_FILES := unprivileged-remount-test nosymfollow-test include ../lib.mk diff --git a/tools/testing/selftests/mount/nosymfollow-test.c b/tools/testing/selftests/mount/nosymfollow-test.c new file mode 100644 index 000000000000..650d6d80a1d2 --- /dev/null +++ b/tools/testing/selftests/mount/nosymfollow-test.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef MS_NOSYMFOLLOW +# define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#endif + +#ifndef ST_NOSYMFOLLOW +# define ST_NOSYMFOLLOW 0x2000 /* Do not follow symlinks */ +#endif + +#define DATA "/tmp/data" +#define LINK "/tmp/symlink" +#define TMP "/tmp" + +static void die(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, + va_list ap) +{ + ssize_t written; + char buf[4096]; + int buf_len; + int fd; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) + die("vsnprintf failed: %s\n", strerror(errno)); + + if (buf_len >= sizeof(buf)) + die("vsnprintf output truncated\n"); + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; + die("open of %s failed: %s\n", filename, strerror(errno)); + } + + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + die("short write to %s\n", filename); + } else { + die("write to %s failed: %s\n", + filename, strerror(errno)); + } + } + + if (close(fd) != 0) + die("close of %s failed: %s\n", filename, strerror(errno)); +} + +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); +} + +static void create_and_enter_ns(void) +{ + uid_t uid = getuid(); + gid_t gid = getgid(); + + if (unshare(CLONE_NEWUSER) != 0) + die("unshare(CLONE_NEWUSER) failed: %s\n", strerror(errno)); + + maybe_write_file("/proc/self/setgroups", "deny"); + write_file("/proc/self/uid_map", "0 %d 1", uid); + write_file("/proc/self/gid_map", "0 %d 1", gid); + + if (setgid(0) != 0) + die("setgid(0) failed %s\n", strerror(errno)); + if (setuid(0) != 0) + die("setuid(0) failed %s\n", strerror(errno)); + + if (unshare(CLONE_NEWNS) != 0) + die("unshare(CLONE_NEWNS) failed: %s\n", strerror(errno)); +} + +static void setup_symlink(void) +{ + int data, err; + + data = creat(DATA, O_RDWR); + if (data < 0) + die("creat failed: %s\n", strerror(errno)); + + err = symlink(DATA, LINK); + if (err < 0) + die("symlink failed: %s\n", strerror(errno)); + + if (close(data) != 0) + die("close of %s failed: %s\n", DATA, strerror(errno)); +} + +static void test_link_traversal(bool nosymfollow) +{ + int link; + + link = open(LINK, 0, O_RDWR); + if (nosymfollow) { + if ((link != -1 || errno != ELOOP)) { + die("link traversal unexpected result: %d, %s\n", + link, strerror(errno)); + } + } else { + if (link < 0) + die("link traversal failed: %s\n", strerror(errno)); + + if (close(link) != 0) + die("close of link failed: %s\n", strerror(errno)); + } +} + +static void test_readlink(void) +{ + char buf[4096]; + ssize_t ret; + + bzero(buf, sizeof(buf)); + + ret = readlink(LINK, buf, sizeof(buf)); + if (ret < 0) + die("readlink failed: %s\n", strerror(errno)); + if (strcmp(buf, DATA) != 0) + die("readlink strcmp failed: '%s' '%s'\n", buf, DATA); +} + +static void test_realpath(void) +{ + char *path = realpath(LINK, NULL); + + if (!path) + die("realpath failed: %s\n", strerror(errno)); + if (strcmp(path, DATA) != 0) + die("realpath strcmp failed\n"); + + free(path); +} + +static void test_statfs(bool nosymfollow) +{ + struct statfs buf; + int ret; + + ret = statfs(TMP, &buf); + if (ret) + die("statfs failed: %s\n", strerror(errno)); + + if (nosymfollow) { + if ((buf.f_flags & ST_NOSYMFOLLOW) == 0) + die("ST_NOSYMFOLLOW not set on %s\n", TMP); + } else { + if ((buf.f_flags & ST_NOSYMFOLLOW) != 0) + die("ST_NOSYMFOLLOW set on %s\n", TMP); + } +} + +static void run_tests(bool nosymfollow) +{ + test_link_traversal(nosymfollow); + test_readlink(); + test_realpath(); + test_statfs(nosymfollow); +} + +int main(int argc, char **argv) +{ + create_and_enter_ns(); + + if (mount("testing", TMP, "ramfs", 0, NULL) != 0) + die("mount failed: %s\n", strerror(errno)); + + setup_symlink(); + run_tests(false); + + if (mount("testing", TMP, "ramfs", MS_REMOUNT|MS_NOSYMFOLLOW, NULL) != 0) + die("remount failed: %s\n", strerror(errno)); + + run_tests(true); + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/mount/run_nosymfollow.sh b/tools/testing/selftests/mount/run_nosymfollow.sh new file mode 100755 index 000000000000..5fbbf03043a2 --- /dev/null +++ b/tools/testing/selftests/mount/run_nosymfollow.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +./nosymfollow-test diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh deleted file mode 100755 index 4ab8f507dcba..000000000000 --- a/tools/testing/selftests/mount/run_tests.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -# Run mount selftests -if [ -f /proc/self/uid_map ] ; then - ./unprivileged-remount-test ; -else - echo "WARN: No /proc/self/uid_map exist, test skipped." ; - exit $ksft_skip -fi diff --git a/tools/testing/selftests/mount/run_unprivileged_remount.sh b/tools/testing/selftests/mount/run_unprivileged_remount.sh new file mode 100755 index 000000000000..4ab8f507dcba --- /dev/null +++ b/tools/testing/selftests/mount/run_unprivileged_remount.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Run mount selftests +if [ -f /proc/self/uid_map ] ; then + ./unprivileged-remount-test ; +else + echo "WARN: No /proc/self/uid_map exist, test skipped." ; + exit $ksft_skip +fi -- cgit v1.3.1 From 661b37cd437ef49cd28444f79b9b0c71ea76e8c8 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 27 Aug 2020 10:53:36 +0200 Subject: tools, bpf/build: Cleanup feature files on make clean The system for "Auto-detecting system features" located under tools/build/ are (currently) used by perf, libbpf and bpftool. It can contain stalled feature detection files, which are not cleaned up by libbpf and bpftool on make clean (side-note: perf tool is correct). Fix this by making the users invoke the make clean target. Some details about the changes. The libbpf Makefile already had a clean-config target (which seems to be copy-pasted from perf), but this target was not "connected" (a make dependency) to clean target. Choose not to rename target as someone might be using it. Did change the output from "CLEAN config" to "CLEAN feature-detect", to make it more clear what happens. This is related to the complaint and troubleshooting in the following link: https://lore.kernel.org/lkml/20200818122007.2d1cfe2d@carbon/ Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/lkml/20200818122007.2d1cfe2d@carbon/ Link: https://lore.kernel.org/bpf/159851841661.1072907.13770213104521805592.stgit@firesoul --- tools/bpf/bpftool/Makefile | 6 +++++- tools/build/Makefile | 2 ++ tools/lib/bpf/Makefile | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 8462690a039b..02c99bc95c69 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -176,7 +176,11 @@ $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(OUTPUT)%.o: %.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< -clean: $(LIBBPF)-clean +feature-detect-clean: + $(call QUIET_CLEAN, feature-detect) + $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null + +clean: $(LIBBPF)-clean feature-detect-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h diff --git a/tools/build/Makefile b/tools/build/Makefile index 727050c40f09..722f1700d96a 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -38,6 +38,8 @@ clean: $(call QUIET_CLEAN, fixdep) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)rm -f $(OUTPUT)fixdep + $(call QUIET_CLEAN, feature-detect) + $(Q)$(MAKE) -C feature/ clean >/dev/null $(OUTPUT)fixdep-in.o: FORCE $(Q)$(MAKE) $(build)=fixdep diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 66b2cfadf262..adbe994610f2 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -264,10 +264,10 @@ install: install_lib install_pkgconfig install_headers ### Cleaning rules config-clean: - $(call QUIET_CLEAN, config) + $(call QUIET_CLEAN, feature-detect) $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null -clean: +clean: config-clean $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ -- cgit v1.3.1 From b0c9eb37817943840a1a82dbc998c491609a0afd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 27 Aug 2020 22:19:22 -0700 Subject: bpf: Make bpf_link_info.iter similar to bpf_iter_link_info bpf_link_info.iter is used by link_query to return bpf_iter_link_info to user space. Fields may be different, e.g., map_fd vs. map_id, so we cannot reuse the exact structure. But make them similar, e.g., struct bpf_link_info { /* common fields */ union { struct { ... } raw_tracepoint; struct { ... } tracing; ... struct { /* common fields for iter */ union { struct { __u32 map_id; } map; /* other structs for other targets */ }; }; }; }; so the structure is extensible the same way as bpf_iter_link_info. Fixes: 6b0a249a301e ("bpf: Implement link_query for bpf iterators") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200828051922.758950-1-yhs@fb.com --- include/uapi/linux/bpf.h | 6 ++++-- tools/include/uapi/linux/bpf.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0388bc0200b0..ef7af384f5ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4251,8 +4251,10 @@ struct bpf_link_info { __aligned_u64 target_name; /* in/out: target_name buffer ptr */ __u32 target_name_len; /* in/out: target_name buffer len */ union { - __u32 map_id; - } map; + struct { + __u32 map_id; + } map; + }; } iter; struct { __u32 netns_ino; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0388bc0200b0..ef7af384f5ee 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4251,8 +4251,10 @@ struct bpf_link_info { __aligned_u64 target_name; /* in/out: target_name buffer ptr */ __u32 target_name_len; /* in/out: target_name buffer len */ union { - __u32 map_id; - } map; + struct { + __u32 map_id; + } map; + }; } iter; struct { __u32 netns_ino; -- cgit v1.3.1 From d557ea39a5f894630c403b78703ac92b08b7dd62 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 27 Aug 2020 18:18:19 -0700 Subject: bpf: selftests: Add test for different inner map size This patch tests the inner map size can be different for reuseport_sockarray but has to be the same for arraymap. A new subtest "diff_size" is added for this. The existing test is moved to a subtest "lookup_update". Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200828011819.1970825-1-kafai@fb.com --- .../selftests/bpf/prog_tests/btf_map_in_map.c | 35 +++++++++++++++++++++- .../selftests/bpf/progs/test_btf_map_in_map.c | 31 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index 6ccecbd39476..540fea4c91a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -53,7 +53,7 @@ static int kern_sync_rcu(void) return err; } -void test_btf_map_in_map(void) +static void test_lookup_update(void) { int err, key = 0, val, i; struct test_btf_map_in_map *skel; @@ -143,3 +143,36 @@ void test_btf_map_in_map(void) cleanup: test_btf_map_in_map__destroy(skel); } + +static void test_diff_size(void) +{ + struct test_btf_map_in_map *skel; + int err, inner_map_fd, zero = 0; + + skel = test_btf_map_in_map__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) + return; + + inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero, + &inner_map_fd, 0); + CHECK(err, "outer_sockarr inner map size check", + "cannot use a different size inner_map\n"); + + inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero, + &inner_map_fd, 0); + CHECK(!err, "outer_arr inner map size check", + "incorrectly updated with a different size inner_map\n"); + + test_btf_map_in_map__destroy(skel); +} + +void test_btf_map_in_map(void) +{ + if (test__start_subtest("lookup_update")) + test_lookup_update(); + + if (test__start_subtest("diff_size")) + test_diff_size(); +} diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c index e5093796be97..193fe0198b21 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -11,6 +11,13 @@ struct inner_map { } inner_map1 SEC(".maps"), inner_map2 SEC(".maps"); +struct inner_map_sz2 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} inner_map_sz2 SEC(".maps"); + struct outer_arr { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 3); @@ -50,6 +57,30 @@ struct outer_hash { }, }; +struct sockarr_sz1 { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sockarr_sz1 SEC(".maps"); + +struct sockarr_sz2 { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} sockarr_sz2 SEC(".maps"); + +struct outer_sockarr_sz1 { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct sockarr_sz1); +} outer_sockarr SEC(".maps") = { + .values = { (void *)&sockarr_sz1 }, +}; + int input = 0; SEC("raw_tp/sys_enter") -- cgit v1.3.1 From 1e6c62a8821557720a9b2ea9617359b264f2f67c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 27 Aug 2020 15:01:11 -0700 Subject: bpf: Introduce sleepable BPF programs Introduce sleepable BPF programs that can request such property for themselves via BPF_F_SLEEPABLE flag at program load time. In such case they will be able to use helpers like bpf_copy_from_user() that might sleep. At present only fentry/fexit/fmod_ret and lsm programs can request to be sleepable and only when they are attached to kernel functions that are known to allow sleeping. The non-sleepable programs are relying on implicit rcu_read_lock() and migrate_disable() to protect life time of programs, maps that they use and per-cpu kernel structures used to pass info between bpf programs and the kernel. The sleepable programs cannot be enclosed into rcu_read_lock(). migrate_disable() maps to preempt_disable() in non-RT kernels, so the progs should not be enclosed in migrate_disable() as well. Therefore rcu_read_lock_trace is used to protect the life time of sleepable progs. There are many networking and tracing program types. In many cases the 'struct bpf_prog *' pointer itself is rcu protected within some other kernel data structure and the kernel code is using rcu_dereference() to load that program pointer and call BPF_PROG_RUN() on it. All these cases are not touched. Instead sleepable bpf programs are allowed with bpf trampoline only. The program pointers are hard-coded into generated assembly of bpf trampoline and synchronize_rcu_tasks_trace() is used to protect the life time of the program. The same trampoline can hold both sleepable and non-sleepable progs. When rcu_read_lock_trace is held it means that some sleepable bpf program is running from bpf trampoline. Those programs can use bpf arrays and preallocated hash/lru maps. These map types are waiting on programs to complete via synchronize_rcu_tasks_trace(); Updates to trampoline now has to do synchronize_rcu_tasks_trace() and synchronize_rcu_tasks() to wait for sleepable progs to finish and for trampoline assembly to finish. This is the first step of introducing sleepable progs. Eventually dynamically allocated hash maps can be allowed and networking program types can become sleepable too. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Reviewed-by: Josef Bacik Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200827220114.69225-3-alexei.starovoitov@gmail.com --- arch/x86/net/bpf_jit_comp.c | 32 +++++++++++------ include/linux/bpf.h | 3 ++ include/uapi/linux/bpf.h | 8 +++++ init/Kconfig | 1 + kernel/bpf/arraymap.c | 1 + kernel/bpf/hashtab.c | 12 +++---- kernel/bpf/syscall.c | 13 +++++-- kernel/bpf/trampoline.c | 28 +++++++++++++-- kernel/bpf/verifier.c | 81 ++++++++++++++++++++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 8 +++++ 10 files changed, 162 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 42b6709e6dc7..7d9ea7b41c71 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1379,10 +1379,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, u8 *prog = *pprog; int cnt = 0; - if (emit_call(&prog, __bpf_prog_enter, prog)) - return -EINVAL; - /* remember prog start time returned by __bpf_prog_enter */ - emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); + if (p->aux->sleepable) { + if (emit_call(&prog, __bpf_prog_enter_sleepable, prog)) + return -EINVAL; + } else { + if (emit_call(&prog, __bpf_prog_enter, prog)) + return -EINVAL; + /* remember prog start time returned by __bpf_prog_enter */ + emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); + } /* arg1: lea rdi, [rbp - stack_size] */ EMIT4(0x48, 0x8D, 0x7D, -stack_size); @@ -1402,13 +1407,18 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (mod_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); - /* arg1: mov rdi, progs[i] */ - emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, - (u32) (long) p); - /* arg2: mov rsi, rbx <- start time in nsec */ - emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); - if (emit_call(&prog, __bpf_prog_exit, prog)) - return -EINVAL; + if (p->aux->sleepable) { + if (emit_call(&prog, __bpf_prog_exit_sleepable, prog)) + return -EINVAL; + } else { + /* arg1: mov rdi, progs[i] */ + emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, + (u32) (long) p); + /* arg2: mov rsi, rbx <- start time in nsec */ + emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + if (emit_call(&prog, __bpf_prog_exit, prog)) + return -EINVAL; + } *pprog = prog; return 0; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dbba82a80087..4dd7e927621d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -539,6 +539,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, /* these two functions are called from generated trampoline */ u64 notrace __bpf_prog_enter(void); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); +void notrace __bpf_prog_enter_sleepable(void); +void notrace __bpf_prog_exit_sleepable(void); struct bpf_ksym { unsigned long start; @@ -734,6 +736,7 @@ struct bpf_prog_aux { bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; + bool sleepable; enum bpf_tramp_prog_type trampoline_prog_type; struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ef7af384f5ee..6e8b706aeb05 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -346,6 +346,14 @@ enum bpf_link_type { /* The verifier internal test flag. Behavior is undefined */ #define BPF_F_TEST_STATE_FREQ (1U << 3) +/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will + * restrict map and helper usage for such programs. Sleepable BPF programs can + * only be attached to hooks where kernel execution context allows sleeping. + * Such programs are allowed to use helpers that may sleep like + * bpf_copy_from_user(). + */ +#define BPF_F_SLEEPABLE (1U << 4) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * two extensions: * diff --git a/init/Kconfig b/init/Kconfig index fc10f7ede5f6..6ecc00e130ff 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1691,6 +1691,7 @@ config BPF_SYSCALL bool "Enable bpf() system call" select BPF select IRQ_WORK + select TASKS_TRACE_RCU default n help Enable the bpf() system call that allows to manipulate eBPF diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index d851ebbcf302..e046fb7d17cd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "map_in_map.h" diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index ad80f45774e7..fe0e06284d33 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "percpu_freelist.h" #include "bpf_lru_list.h" #include "map_in_map.h" @@ -577,8 +578,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) struct htab_elem *l; u32 hash, key_size; - /* Must be called with rcu_read_lock. */ - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); key_size = map->key_size; @@ -941,7 +941,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, /* unknown flags */ return -EINVAL; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); key_size = map->key_size; @@ -1032,7 +1032,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, /* unknown flags */ return -EINVAL; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); key_size = map->key_size; @@ -1220,7 +1220,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) u32 hash, key_size; int ret = -ENOENT; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); key_size = map->key_size; @@ -1252,7 +1252,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) u32 hash, key_size; int ret = -ENOENT; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); key_size = map->key_size; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b86b1155b748..4108ef3b828b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -29,6 +29,7 @@ #include #include #include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ @@ -1731,10 +1732,14 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) btf_put(prog->aux->btf); bpf_prog_free_linfo(prog); - if (deferred) - call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); - else + if (deferred) { + if (prog->aux->sleepable) + call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); + else + call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); + } else { __bpf_prog_put_rcu(&prog->aux->rcu); + } } static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) @@ -2104,6 +2109,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT | BPF_F_TEST_STATE_FREQ | + BPF_F_SLEEPABLE | BPF_F_TEST_RND_HI32)) return -EINVAL; @@ -2159,6 +2165,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) } prog->aux->offload_requested = !!attr->prog_ifindex; + prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; err = security_bpf_prog_alloc(prog->aux); if (err) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 9be85aa4ec5f..c2b76545153c 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -210,9 +212,12 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) * updates to trampoline would change the code from underneath the * preempted task. Hence wait for tasks to voluntarily schedule or go * to userspace. + * The same trampoline can hold both sleepable and non-sleepable progs. + * synchronize_rcu_tasks_trace() is needed to make sure all sleepable + * programs finish executing. + * Wait for these two grace periods together. */ - - synchronize_rcu_tasks(); + synchronize_rcu_mult(call_rcu_tasks, call_rcu_tasks_trace); err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, &tr->func.model, flags, tprogs, @@ -344,7 +349,14 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) goto out; bpf_image_ksym_del(&tr->ksym); - /* wait for tasks to get out of trampoline before freeing it */ + /* This code will be executed when all bpf progs (both sleepable and + * non-sleepable) went through + * bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred(). + * Hence no need for another synchronize_rcu_tasks_trace() here, + * but synchronize_rcu_tasks() is still needed, since trampoline + * may not have had any sleepable programs and we need to wait + * for tasks to get out of trampoline code before freeing it. + */ synchronize_rcu_tasks(); bpf_jit_free_exec(tr->image); hlist_del(&tr->hlist); @@ -394,6 +406,16 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) rcu_read_unlock(); } +void notrace __bpf_prog_enter_sleepable(void) +{ + rcu_read_lock_trace(); +} + +void notrace __bpf_prog_exit_sleepable(void) +{ + rcu_read_unlock_trace(); +} + int __weak arch_prepare_bpf_trampoline(void *image, void *image_end, const struct btf_func_model *m, u32 flags, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6f5a9f51cc03..3ebfdb7bd427 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "disasm.h" @@ -9367,6 +9368,23 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, return -EINVAL; } + if (prog->aux->sleepable) + switch (map->map_type) { + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_ARRAY: + if (!is_preallocated_map(map)) { + verbose(env, + "Sleepable programs can only use preallocated hash maps\n"); + return -EINVAL; + } + break; + default: + verbose(env, + "Sleepable programs can only use array and hash maps\n"); + return -EINVAL; + } + return 0; } @@ -10985,6 +11003,36 @@ static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr) return -EINVAL; } +/* non exhaustive list of sleepable bpf_lsm_*() functions */ +BTF_SET_START(btf_sleepable_lsm_hooks) +#ifdef CONFIG_BPF_LSM +BTF_ID(func, bpf_lsm_file_mprotect) +BTF_ID(func, bpf_lsm_bprm_committed_creds) +#endif +BTF_SET_END(btf_sleepable_lsm_hooks) + +static int check_sleepable_lsm_hook(u32 btf_id) +{ + return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id); +} + +/* list of non-sleepable functions that are otherwise on + * ALLOW_ERROR_INJECTION list + */ +BTF_SET_START(btf_non_sleepable_error_inject) +/* Three functions below can be called from sleepable and non-sleepable context. + * Assume non-sleepable from bpf safety point of view. + */ +BTF_ID(func, __add_to_page_cache_locked) +BTF_ID(func, should_fail_alloc_page) +BTF_ID(func, should_failslab) +BTF_SET_END(btf_non_sleepable_error_inject) + +static int check_non_sleepable_error_inject(u32 btf_id) +{ + return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); +} + static int check_attach_btf_id(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; @@ -11002,6 +11050,12 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) long addr; u64 key; + if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && + prog->type != BPF_PROG_TYPE_LSM) { + verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n"); + return -EINVAL; + } + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) return check_struct_ops_btf_id(env); @@ -11210,13 +11264,36 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) } } - if (prog->expected_attach_type == BPF_MODIFY_RETURN) { + if (prog->aux->sleepable) { + ret = -EINVAL; + switch (prog->type) { + case BPF_PROG_TYPE_TRACING: + /* fentry/fexit/fmod_ret progs can be sleepable only if they are + * attached to ALLOW_ERROR_INJECTION and are not in denylist. + */ + if (!check_non_sleepable_error_inject(btf_id) && + within_error_injection_list(addr)) + ret = 0; + break; + case BPF_PROG_TYPE_LSM: + /* LSM progs check that they are attached to bpf_lsm_*() funcs. + * Only some of them are sleepable. + */ + if (check_sleepable_lsm_hook(btf_id)) + ret = 0; + break; + default: + break; + } + if (ret) + verbose(env, "%s is not sleepable\n", + prog->aux->attach_func_name); + } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { ret = check_attach_modify_return(prog, addr); if (ret) verbose(env, "%s() is not modifiable\n", prog->aux->attach_func_name); } - if (ret) goto out; tr->func.addr = (void *)addr; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ef7af384f5ee..6e8b706aeb05 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -346,6 +346,14 @@ enum bpf_link_type { /* The verifier internal test flag. Behavior is undefined */ #define BPF_F_TEST_STATE_FREQ (1U << 3) +/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will + * restrict map and helper usage for such programs. Sleepable BPF programs can + * only be attached to hooks where kernel execution context allows sleeping. + * Such programs are allowed to use helpers that may sleep like + * bpf_copy_from_user(). + */ +#define BPF_F_SLEEPABLE (1U << 4) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * two extensions: * -- cgit v1.3.1 From 07be4c4a3e7a0db148e44b16c5190e753d1c8569 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 27 Aug 2020 15:01:12 -0700 Subject: bpf: Add bpf_copy_from_user() helper. Sleepable BPF programs can now use copy_from_user() to access user memory. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200827220114.69225-4-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/helpers.c | 22 ++++++++++++++++++++++ kernel/trace/bpf_trace.c | 2 ++ tools/include/uapi/linux/bpf.h | 8 ++++++++ 5 files changed, 41 insertions(+) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4dd7e927621d..c6d9f2c444f4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1784,6 +1784,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; +extern const struct bpf_func_proto bpf_copy_from_user_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6e8b706aeb05..a613750d5515 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3569,6 +3569,13 @@ union bpf_attr { * On success, the strictly positive length of the string, * including the trailing NUL character. On error, a negative * value. + * + * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr) + * Description + * Read *size* bytes from user space address *user_ptr* and store + * the data in *dst*. This is a wrapper of copy_from_user(). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3719,6 +3726,7 @@ union bpf_attr { FN(inode_storage_get), \ FN(inode_storage_delete), \ FN(d_path), \ + FN(copy_from_user), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index be43ab3e619f..5cc7425ee476 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -601,6 +601,28 @@ const struct bpf_func_proto bpf_event_output_data_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, + const void __user *, user_ptr) +{ + int ret = copy_from_user(dst, user_ptr, size); + + if (unlikely(ret)) { + memset(dst, 0, size); + ret = -EFAULT; + } + + return ret; +} + +const struct bpf_func_proto bpf_copy_from_user_proto = { + .func = bpf_copy_from_user, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d973d891f2e2..b2a5380eb187 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1228,6 +1228,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_jiffies64_proto; case BPF_FUNC_get_task_stack: return &bpf_get_task_stack_proto; + case BPF_FUNC_copy_from_user: + return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; default: return NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6e8b706aeb05..a613750d5515 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3569,6 +3569,13 @@ union bpf_attr { * On success, the strictly positive length of the string, * including the trailing NUL character. On error, a negative * value. + * + * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr) + * Description + * Read *size* bytes from user space address *user_ptr* and store + * the data in *dst*. This is a wrapper of copy_from_user(). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3719,6 +3726,7 @@ union bpf_attr { FN(inode_storage_get), \ FN(inode_storage_delete), \ FN(d_path), \ + FN(copy_from_user), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 2b288740a1072235166b2706e139f2725b5ea26f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 27 Aug 2020 15:01:13 -0700 Subject: libbpf: Support sleepable progs Pass request to load program as sleepable via ".s" suffix in the section name. If it happens in the future that all map types and helpers are allowed with BPF_F_SLEEPABLE flag "fmod_ret/" and "lsm/" can be aliased to "fmod_ret.s/" and "lsm.s/" to make all lsm and fmod_ret programs sleepable by default. The fentry and fexit programs would always need to have sleepable vs non-sleepable distinction, since not all fentry/fexit progs will be attached to sleepable kernel functions. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: KP Singh Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200827220114.69225-5-alexei.starovoitov@gmail.com --- tools/lib/bpf/libbpf.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8cdb2528482e..b688aadf09c5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -208,6 +208,7 @@ struct bpf_sec_def { bool is_exp_attach_type_optional; bool is_attachable; bool is_attach_btf; + bool is_sleepable; attach_fn_t attach_fn; }; @@ -6291,6 +6292,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, /* couldn't guess, but user might manually specify */ continue; + if (prog->sec_def->is_sleepable) + prog->prog_flags |= BPF_F_SLEEPABLE; bpf_program__set_type(prog, prog->sec_def->prog_type); bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); @@ -7559,6 +7562,21 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_FEXIT, .is_attach_btf = true, .attach_fn = attach_trace), + SEC_DEF("fentry.s/", TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .is_attach_btf = true, + .is_sleepable = true, + .attach_fn = attach_trace), + SEC_DEF("fmod_ret.s/", TRACING, + .expected_attach_type = BPF_MODIFY_RETURN, + .is_attach_btf = true, + .is_sleepable = true, + .attach_fn = attach_trace), + SEC_DEF("fexit.s/", TRACING, + .expected_attach_type = BPF_TRACE_FEXIT, + .is_attach_btf = true, + .is_sleepable = true, + .attach_fn = attach_trace), SEC_DEF("freplace/", EXT, .is_attach_btf = true, .attach_fn = attach_trace), @@ -7566,6 +7584,11 @@ static const struct bpf_sec_def section_defs[] = { .is_attach_btf = true, .expected_attach_type = BPF_LSM_MAC, .attach_fn = attach_lsm), + SEC_DEF("lsm.s/", LSM, + .is_attach_btf = true, + .is_sleepable = true, + .expected_attach_type = BPF_LSM_MAC, + .attach_fn = attach_lsm), SEC_DEF("iter/", TRACING, .expected_attach_type = BPF_TRACE_ITER, .is_attach_btf = true, @@ -8288,7 +8311,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, prog->prog_ifindex = attr->ifindex; prog->log_level = attr->log_level; - prog->prog_flags = attr->prog_flags; + prog->prog_flags |= attr->prog_flags; if (!first_prog) first_prog = prog; } -- cgit v1.3.1 From e68a144547fc7a956952260539cb7b8bb9afbcc0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 27 Aug 2020 15:01:14 -0700 Subject: selftests/bpf: Add sleepable tests Modify few tests to sanity test sleepable bpf functionality. Running 'bench trig-fentry-sleep' vs 'bench trig-fentry' and 'perf report': sleepable with SRCU: 3.86% bench [k] __srcu_read_unlock 3.22% bench [k] __srcu_read_lock 0.92% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep 0.50% bench [k] bpf_trampoline_10297 0.26% bench [k] __bpf_prog_exit_sleepable 0.21% bench [k] __bpf_prog_enter_sleepable sleepable with RCU_TRACE: 0.79% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep 0.72% bench [k] bpf_trampoline_10381 0.31% bench [k] __bpf_prog_exit_sleepable 0.29% bench [k] __bpf_prog_enter_sleepable non-sleepable with RCU: 0.88% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry 0.84% bench [k] bpf_trampoline_10297 0.13% bench [k] __bpf_prog_enter 0.12% bench [k] __bpf_prog_exit Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200827220114.69225-6-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/bench.c | 2 + tools/testing/selftests/bpf/benchs/bench_trigger.c | 17 ++++++ tools/testing/selftests/bpf/prog_tests/test_lsm.c | 9 +++ tools/testing/selftests/bpf/progs/lsm.c | 66 +++++++++++++++++++++- tools/testing/selftests/bpf/progs/trigger_bench.c | 7 +++ 5 files changed, 99 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 944ad4721c83..1a427685a8a8 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -317,6 +317,7 @@ extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; extern const struct bench bench_trig_kprobe; extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; @@ -338,6 +339,7 @@ static const struct bench *benchs[] = { &bench_trig_rawtp, &bench_trig_kprobe, &bench_trig_fentry, + &bench_trig_fentry_sleep, &bench_trig_fmodret, &bench_rb_libbpf, &bench_rb_custom, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 49c22832f216..2a0b6c9885a4 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -90,6 +90,12 @@ static void trigger_fentry_setup() attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void trigger_fentry_sleep_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); +} + static void trigger_fmodret_setup() { setup_ctx(); @@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = { .report_final = hits_drops_report_final, }; +const struct bench bench_trig_fentry_sleep = { + .name = "trig-fentry-sleep", + .validate = trigger_validate, + .setup = trigger_fentry_sleep_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + const struct bench bench_trig_fmodret = { .name = "trig-fmodret", .validate = trigger_validate, diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index b17eb2045c1d..6ab29226c99b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "lsm.skel.h" @@ -55,6 +56,7 @@ void test_test_lsm(void) { struct lsm *skel = NULL; int err, duration = 0; + int buf = 1234; skel = lsm__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -81,6 +83,13 @@ void test_test_lsm(void) CHECK(skel->bss->mprotect_count != 1, "mprotect_count", "mprotect_count = %d\n", skel->bss->mprotect_count); + syscall(__NR_setdomainname, &buf, -2L); + syscall(__NR_setdomainname, 0, -3L); + syscall(__NR_setdomainname, ~0L, -4L); + + CHECK(skel->bss->copy_test != 3, "copy_test", + "copy_test = %d\n", skel->bss->copy_test); + close_prog: lsm__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index b4598d4bc4f7..49fa6ca99755 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -9,16 +9,41 @@ #include #include +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} lru_hash SEC(".maps"); + char _license[] SEC("license") = "GPL"; int monitored_pid = 0; int mprotect_count = 0; int bprm_count = 0; -SEC("lsm/file_mprotect") +SEC("lsm.s/file_mprotect") int BPF_PROG(test_int_hook, struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot, int ret) { + char args[64]; + __u32 key = 0; + __u64 *value; + if (ret != 0) return ret; @@ -28,6 +53,18 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, is_stack = (vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack); + bpf_copy_from_user(args, sizeof(args), (void *)vma->vm_mm->arg_start); + + value = bpf_map_lookup_elem(&array, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&hash, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&lru_hash, &key); + if (value) + *value = 0; + if (is_stack && monitored_pid == pid) { mprotect_count++; ret = -EPERM; @@ -36,7 +73,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, return ret; } -SEC("lsm/bprm_committed_creds") +SEC("lsm.s/bprm_committed_creds") int BPF_PROG(test_void_hook, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; @@ -46,3 +83,28 @@ int BPF_PROG(test_void_hook, struct linux_binprm *bprm) return 0; } +SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */ +int BPF_PROG(test_task_free, struct task_struct *task) +{ + return 0; +} + +int copy_test = 0; + +SEC("fentry.s/__x64_sys_setdomainname") +int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs) +{ + void *ptr = (void *)PT_REGS_PARM1(regs); + int len = PT_REGS_PARM2(regs); + int buf = 0; + long ret; + + ret = bpf_copy_from_user(&buf, sizeof(buf), ptr); + if (len == -2 && ret == 0 && buf == 1234) + copy_test++; + if (len == -3 && ret == -EFAULT) + copy_test++; + if (len == -4 && ret == -EFAULT) + copy_test++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 8b36b6640e7e..9a4d09590b3d 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx) return 0; } +SEC("fentry.s/__x64_sys_getpgid") +int bench_trigger_fentry_sleep(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + SEC("fmod_ret/__x64_sys_getpgid") int bench_trigger_fmodret(void *ctx) { -- cgit v1.3.1 From 2f6324a3937f8517967d94daef2ba0bdceceece1 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 28 Aug 2020 10:26:27 +0200 Subject: libbpf: Support shared umems between queues and devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for shared umems between hardware queues and devices to the AF_XDP part of libbpf. This so that zero-copy can be achieved in applications that want to send and receive packets between HW queues on one device or between different devices/netdevs. In order to create sockets that share a umem between hardware queues and devices, a new function has been added called xsk_socket__create_shared(). It takes the same arguments as xsk_socket_create() plus references to a fill ring and a completion ring. So for every socket that share a umem, you need to have one more set of fill and completion rings. This in order to maintain the single-producer single-consumer semantics of the rings. You can create all the sockets via the new xsk_socket__create_shared() call, or create the first one with xsk_socket__create() and the rest with xsk_socket__create_shared(). Both methods work. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/1598603189-32145-14-git-send-email-magnus.karlsson@intel.com --- tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/xsk.c | 376 ++++++++++++++++++++++++++++++----------------- tools/lib/bpf/xsk.h | 9 ++ 3 files changed, 254 insertions(+), 132 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 66a6286d0716..3fedcdc4ae2f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -306,4 +306,5 @@ LIBBPF_0.2.0 { perf_buffer__buffer_fd; perf_buffer__epoll_fd; perf_buffer__consume_buffer; + xsk_socket__create_shared; } LIBBPF_0.1.0; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index a9b02103767b..49c324594792 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -45,26 +46,35 @@ #endif struct xsk_umem { - struct xsk_ring_prod *fill; - struct xsk_ring_cons *comp; + struct xsk_ring_prod *fill_save; + struct xsk_ring_cons *comp_save; char *umem_area; struct xsk_umem_config config; int fd; int refcount; + struct list_head ctx_list; +}; + +struct xsk_ctx { + struct xsk_ring_prod *fill; + struct xsk_ring_cons *comp; + __u32 queue_id; + struct xsk_umem *umem; + int refcount; + int ifindex; + struct list_head list; + int prog_fd; + int xsks_map_fd; + char ifname[IFNAMSIZ]; }; struct xsk_socket { struct xsk_ring_cons *rx; struct xsk_ring_prod *tx; __u64 outstanding_tx; - struct xsk_umem *umem; + struct xsk_ctx *ctx; struct xsk_socket_config config; int fd; - int ifindex; - int prog_fd; - int xsks_map_fd; - __u32 queue_id; - char ifname[IFNAMSIZ]; }; struct xsk_nl_info { @@ -200,15 +210,73 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) return -EINVAL; } +static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp) +{ + struct xdp_mmap_offsets off; + void *map; + int err; + + err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, + &umem->config.fill_size, + sizeof(umem->config.fill_size)); + if (err) + return -errno; + + err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, + &umem->config.comp_size, + sizeof(umem->config.comp_size)); + if (err) + return -errno; + + err = xsk_get_mmap_offsets(fd, &off); + if (err) + return -errno; + + map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + XDP_UMEM_PGOFF_FILL_RING); + if (map == MAP_FAILED) + return -errno; + + fill->mask = umem->config.fill_size - 1; + fill->size = umem->config.fill_size; + fill->producer = map + off.fr.producer; + fill->consumer = map + off.fr.consumer; + fill->flags = map + off.fr.flags; + fill->ring = map + off.fr.desc; + fill->cached_cons = umem->config.fill_size; + + map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + XDP_UMEM_PGOFF_COMPLETION_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap; + } + + comp->mask = umem->config.comp_size - 1; + comp->size = umem->config.comp_size; + comp->producer = map + off.cr.producer; + comp->consumer = map + off.cr.consumer; + comp->flags = map + off.cr.flags; + comp->ring = map + off.cr.desc; + + return 0; + +out_mmap: + munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); + return err; +} + int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, const struct xsk_umem_config *usr_config) { - struct xdp_mmap_offsets off; struct xdp_umem_reg mr; struct xsk_umem *umem; - void *map; int err; if (!umem_area || !umem_ptr || !fill || !comp) @@ -227,6 +295,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, } umem->umem_area = umem_area; + INIT_LIST_HEAD(&umem->ctx_list); xsk_set_umem_config(&umem->config, usr_config); memset(&mr, 0, sizeof(mr)); @@ -241,71 +310,16 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, err = -errno; goto out_socket; } - err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING, - &umem->config.fill_size, - sizeof(umem->config.fill_size)); - if (err) { - err = -errno; - goto out_socket; - } - err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, - &umem->config.comp_size, - sizeof(umem->config.comp_size)); - if (err) { - err = -errno; - goto out_socket; - } - err = xsk_get_mmap_offsets(umem->fd, &off); - if (err) { - err = -errno; - goto out_socket; - } - - map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, - XDP_UMEM_PGOFF_FILL_RING); - if (map == MAP_FAILED) { - err = -errno; + err = xsk_create_umem_rings(umem, umem->fd, fill, comp); + if (err) goto out_socket; - } - - umem->fill = fill; - fill->mask = umem->config.fill_size - 1; - fill->size = umem->config.fill_size; - fill->producer = map + off.fr.producer; - fill->consumer = map + off.fr.consumer; - fill->flags = map + off.fr.flags; - fill->ring = map + off.fr.desc; - fill->cached_prod = *fill->producer; - /* cached_cons is "size" bigger than the real consumer pointer - * See xsk_prod_nb_free - */ - fill->cached_cons = *fill->consumer + umem->config.fill_size; - - map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, - XDP_UMEM_PGOFF_COMPLETION_RING); - if (map == MAP_FAILED) { - err = -errno; - goto out_mmap; - } - - umem->comp = comp; - comp->mask = umem->config.comp_size - 1; - comp->size = umem->config.comp_size; - comp->producer = map + off.cr.producer; - comp->consumer = map + off.cr.consumer; - comp->flags = map + off.cr.flags; - comp->ring = map + off.cr.desc; - comp->cached_prod = *comp->producer; - comp->cached_cons = *comp->consumer; + umem->fill_save = fill; + umem->comp_save = comp; *umem_ptr = umem; return 0; -out_mmap: - munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); out_socket: close(umem->fd); out_umem_alloc: @@ -339,6 +353,7 @@ DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) static int xsk_load_xdp_prog(struct xsk_socket *xsk) { static const int log_buf_size = 16 * 1024; + struct xsk_ctx *ctx = xsk->ctx; char log_buf[log_buf_size]; int err, prog_fd; @@ -366,7 +381,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* *(u32 *)(r10 - 4) = r2 */ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), /* r3 = XDP_PASS */ BPF_MOV64_IMM(BPF_REG_3, 2), /* call bpf_redirect_map */ @@ -378,7 +393,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* r2 += -4 */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), /* call bpf_map_lookup_elem */ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), /* r1 = r0 */ @@ -390,7 +405,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* r2 = *(u32 *)(r10 - 4) */ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), /* r3 = 0 */ BPF_MOV64_IMM(BPF_REG_3, 0), /* call bpf_redirect_map */ @@ -408,19 +423,21 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) return prog_fd; } - err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags); + err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd, + xsk->config.xdp_flags); if (err) { close(prog_fd); return err; } - xsk->prog_fd = prog_fd; + ctx->prog_fd = prog_fd; return 0; } static int xsk_get_max_queues(struct xsk_socket *xsk) { struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; + struct xsk_ctx *ctx = xsk->ctx; struct ifreq ifr = {}; int fd, err, ret; @@ -429,7 +446,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) return -errno; ifr.ifr_data = (void *)&channels; - memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1); + memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1); ifr.ifr_name[IFNAMSIZ - 1] = '\0'; err = ioctl(fd, SIOCETHTOOL, &ifr); if (err && errno != EOPNOTSUPP) { @@ -457,6 +474,7 @@ out: static int xsk_create_bpf_maps(struct xsk_socket *xsk) { + struct xsk_ctx *ctx = xsk->ctx; int max_queues; int fd; @@ -469,15 +487,17 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk) if (fd < 0) return fd; - xsk->xsks_map_fd = fd; + ctx->xsks_map_fd = fd; return 0; } static void xsk_delete_bpf_maps(struct xsk_socket *xsk) { - bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); - close(xsk->xsks_map_fd); + struct xsk_ctx *ctx = xsk->ctx; + + bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id); + close(ctx->xsks_map_fd); } static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) @@ -485,10 +505,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); __u32 map_len = sizeof(struct bpf_map_info); struct bpf_prog_info prog_info = {}; + struct xsk_ctx *ctx = xsk->ctx; struct bpf_map_info map_info; int fd, err; - err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); if (err) return err; @@ -502,11 +523,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) prog_info.nr_map_ids = num_maps; prog_info.map_ids = (__u64)(unsigned long)map_ids; - err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); if (err) goto out_map_ids; - xsk->xsks_map_fd = -1; + ctx->xsks_map_fd = -1; for (i = 0; i < prog_info.nr_map_ids; i++) { fd = bpf_map_get_fd_by_id(map_ids[i]); @@ -520,7 +541,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) } if (!strcmp(map_info.name, "xsks_map")) { - xsk->xsks_map_fd = fd; + ctx->xsks_map_fd = fd; continue; } @@ -528,7 +549,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) } err = 0; - if (xsk->xsks_map_fd == -1) + if (ctx->xsks_map_fd == -1) err = -ENOENT; out_map_ids: @@ -538,16 +559,19 @@ out_map_ids: static int xsk_set_bpf_maps(struct xsk_socket *xsk) { - return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, + struct xsk_ctx *ctx = xsk->ctx; + + return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, &xsk->fd, 0); } static int xsk_setup_xdp_prog(struct xsk_socket *xsk) { + struct xsk_ctx *ctx = xsk->ctx; __u32 prog_id = 0; int err; - err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, + err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); if (err) return err; @@ -563,12 +587,12 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) return err; } } else { - xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (xsk->prog_fd < 0) + ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (ctx->prog_fd < 0) return -errno; err = xsk_lookup_bpf_maps(xsk); if (err) { - close(xsk->prog_fd); + close(ctx->prog_fd); return err; } } @@ -577,25 +601,110 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) err = xsk_set_bpf_maps(xsk); if (err) { xsk_delete_bpf_maps(xsk); - close(xsk->prog_fd); + close(ctx->prog_fd); return err; } return 0; } -int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, - const struct xsk_socket_config *usr_config) +static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, + __u32 queue_id) +{ + struct xsk_ctx *ctx; + + if (list_empty(&umem->ctx_list)) + return NULL; + + list_for_each_entry(ctx, &umem->ctx_list, list) { + if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { + ctx->refcount++; + return ctx; + } + } + + return NULL; +} + +static void xsk_put_ctx(struct xsk_ctx *ctx) +{ + struct xsk_umem *umem = ctx->umem; + struct xdp_mmap_offsets off; + int err; + + if (--ctx->refcount == 0) { + err = xsk_get_mmap_offsets(umem->fd, &off); + if (!err) { + munmap(ctx->fill->ring - off.fr.desc, + off.fr.desc + umem->config.fill_size * + sizeof(__u64)); + munmap(ctx->comp->ring - off.cr.desc, + off.cr.desc + umem->config.comp_size * + sizeof(__u64)); + } + + list_del(&ctx->list); + free(ctx); + } +} + +static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, + struct xsk_umem *umem, int ifindex, + const char *ifname, __u32 queue_id, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp) +{ + struct xsk_ctx *ctx; + int err; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + if (!umem->fill_save) { + err = xsk_create_umem_rings(umem, xsk->fd, fill, comp); + if (err) { + free(ctx); + return NULL; + } + } else if (umem->fill_save != fill || umem->comp_save != comp) { + /* Copy over rings to new structs. */ + memcpy(fill, umem->fill_save, sizeof(*fill)); + memcpy(comp, umem->comp_save, sizeof(*comp)); + } + + ctx->ifindex = ifindex; + ctx->refcount = 1; + ctx->umem = umem; + ctx->queue_id = queue_id; + memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); + ctx->ifname[IFNAMSIZ - 1] = '\0'; + + umem->fill_save = NULL; + umem->comp_save = NULL; + ctx->fill = fill; + ctx->comp = comp; + list_add(&ctx->list, &umem->ctx_list); + return ctx; +} + +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *usr_config) { void *rx_map = NULL, *tx_map = NULL; struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; struct xsk_socket *xsk; - int err; + struct xsk_ctx *ctx; + int err, ifindex; - if (!umem || !xsk_ptr || !(rx || tx)) + if (!umem || !xsk_ptr || !(rx || tx) || !fill || !comp) return -EFAULT; xsk = calloc(1, sizeof(*xsk)); @@ -606,10 +715,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, if (err) goto out_xsk_alloc; - if (umem->refcount && - !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { - pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); - err = -EBUSY; + xsk->outstanding_tx = 0; + ifindex = if_nametoindex(ifname); + if (!ifindex) { + err = -errno; goto out_xsk_alloc; } @@ -623,16 +732,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, xsk->fd = umem->fd; } - xsk->outstanding_tx = 0; - xsk->queue_id = queue_id; - xsk->umem = umem; - xsk->ifindex = if_nametoindex(ifname); - if (!xsk->ifindex) { - err = -errno; - goto out_socket; + ctx = xsk_get_ctx(umem, ifindex, queue_id); + if (!ctx) { + ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, + fill, comp); + if (!ctx) { + err = -ENOMEM; + goto out_socket; + } } - memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); - xsk->ifname[IFNAMSIZ - 1] = '\0'; + xsk->ctx = ctx; if (rx) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, @@ -640,7 +749,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, sizeof(xsk->config.rx_size)); if (err) { err = -errno; - goto out_socket; + goto out_put_ctx; } } if (tx) { @@ -649,14 +758,14 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, sizeof(xsk->config.tx_size)); if (err) { err = -errno; - goto out_socket; + goto out_put_ctx; } } err = xsk_get_mmap_offsets(xsk->fd, &off); if (err) { err = -errno; - goto out_socket; + goto out_put_ctx; } if (rx) { @@ -666,7 +775,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, xsk->fd, XDP_PGOFF_RX_RING); if (rx_map == MAP_FAILED) { err = -errno; - goto out_socket; + goto out_put_ctx; } rx->mask = xsk->config.rx_size - 1; @@ -705,10 +814,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, xsk->tx = tx; sxdp.sxdp_family = PF_XDP; - sxdp.sxdp_ifindex = xsk->ifindex; - sxdp.sxdp_queue_id = xsk->queue_id; + sxdp.sxdp_ifindex = ctx->ifindex; + sxdp.sxdp_queue_id = ctx->queue_id; if (umem->refcount > 1) { - sxdp.sxdp_flags = XDP_SHARED_UMEM; + sxdp.sxdp_flags |= XDP_SHARED_UMEM; sxdp.sxdp_shared_umem_fd = umem->fd; } else { sxdp.sxdp_flags = xsk->config.bind_flags; @@ -720,7 +829,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, goto out_mmap_tx; } - xsk->prog_fd = -1; + ctx->prog_fd = -1; if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = xsk_setup_xdp_prog(xsk); @@ -739,6 +848,8 @@ out_mmap_rx: if (rx) munmap(rx_map, off.rx.desc + xsk->config.rx_size * sizeof(struct xdp_desc)); +out_put_ctx: + xsk_put_ctx(ctx); out_socket: if (--umem->refcount) close(xsk->fd); @@ -747,25 +858,24 @@ out_xsk_alloc: return err; } -int xsk_umem__delete(struct xsk_umem *umem) +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, + const struct xsk_socket_config *usr_config) { - struct xdp_mmap_offsets off; - int err; + return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, + rx, tx, umem->fill_save, + umem->comp_save, usr_config); +} +int xsk_umem__delete(struct xsk_umem *umem) +{ if (!umem) return 0; if (umem->refcount) return -EBUSY; - err = xsk_get_mmap_offsets(umem->fd, &off); - if (!err) { - munmap(umem->fill->ring - off.fr.desc, - off.fr.desc + umem->config.fill_size * sizeof(__u64)); - munmap(umem->comp->ring - off.cr.desc, - off.cr.desc + umem->config.comp_size * sizeof(__u64)); - } - close(umem->fd); free(umem); @@ -775,15 +885,16 @@ int xsk_umem__delete(struct xsk_umem *umem) void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); + struct xsk_ctx *ctx = xsk->ctx; struct xdp_mmap_offsets off; int err; if (!xsk) return; - if (xsk->prog_fd != -1) { + if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); - close(xsk->prog_fd); + close(ctx->prog_fd); } err = xsk_get_mmap_offsets(xsk->fd, &off); @@ -796,14 +907,15 @@ void xsk_socket__delete(struct xsk_socket *xsk) munmap(xsk->tx->ring - off.tx.desc, off.tx.desc + xsk->config.tx_size * desc_sz); } - } - xsk->umem->refcount--; + xsk_put_ctx(ctx); + + ctx->umem->refcount--; /* Do not close an fd that also has an associated umem connected * to it. */ - if (xsk->fd != xsk->umem->fd) + if (xsk->fd != ctx->umem->fd) close(xsk->fd); free(xsk); } diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 584f6820a639..1069c46364ff 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -234,6 +234,15 @@ LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, const struct xsk_socket_config *config); +LIBBPF_API int +xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *config); /* Returns 0 for success and -EBUSY if the umem is still in use. */ LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); -- cgit v1.3.1 From 75fa677260be6acf326afc5c466c211b07aee92f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 17 Aug 2020 17:09:46 +0200 Subject: selftests: use "$(MAKE)" instead of "make" for headers_install If top make invocation uses -j4 or larger, this patch reduces "make headers_install" subtask run time from 30 to 7 seconds. CC: Shuah Khan CC: Shuah Khan CC: linux-kselftest@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Denys Vlasenko Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 7a17ea815736..51124b962d56 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -47,9 +47,9 @@ ARCH ?= $(SUBARCH) khdr: ifndef KSFT_KHDR_INSTALL_DONE ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) - make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install + $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install else - make --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \ + $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \ ARCH=$(ARCH) -C $(top_srcdir) headers_install endif endif -- cgit v1.3.1 From f56407fa6e69499a06bf1e0543fa93be6922acba Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 31 Aug 2020 13:16:51 -0700 Subject: bpf: Remove bpf_lsm_file_mprotect from sleepable list. Technically the bpf programs can sleep while attached to bpf_lsm_file_mprotect, but such programs need to access user memory. So they're in might_fault() category. Which means they cannot be called from file_mprotect lsm hook that takes write lock on mm->mmap_lock. Adjust the test accordingly. Also add might_fault() to __bpf_prog_enter_sleepable() to catch such deadlocks early. Fixes: 1e6c62a88215 ("bpf: Introduce sleepable BPF programs") Fixes: e68a144547fc ("selftests/bpf: Add sleepable tests") Reported-by: Yonghong Song Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200831201651.82447-1-alexei.starovoitov@gmail.com --- kernel/bpf/trampoline.c | 1 + kernel/bpf/verifier.c | 1 - tools/testing/selftests/bpf/progs/lsm.c | 34 ++++++++++++++++----------------- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index c2b76545153c..7dd523a7e32d 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -409,6 +409,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) void notrace __bpf_prog_enter_sleepable(void) { rcu_read_lock_trace(); + might_fault(); } void notrace __bpf_prog_exit_sleepable(void) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b4c22b5ce5a2..b4e9c56b8b32 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11006,7 +11006,6 @@ static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr) /* non exhaustive list of sleepable bpf_lsm_*() functions */ BTF_SET_START(btf_sleepable_lsm_hooks) #ifdef CONFIG_BPF_LSM -BTF_ID(func, bpf_lsm_file_mprotect) BTF_ID(func, bpf_lsm_bprm_committed_creds) #else BTF_ID_UNUSED diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index 49fa6ca99755..ff4d343b94b5 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -36,14 +36,10 @@ int monitored_pid = 0; int mprotect_count = 0; int bprm_count = 0; -SEC("lsm.s/file_mprotect") +SEC("lsm/file_mprotect") int BPF_PROG(test_int_hook, struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot, int ret) { - char args[64]; - __u32 key = 0; - __u64 *value; - if (ret != 0) return ret; @@ -53,18 +49,6 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, is_stack = (vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack); - bpf_copy_from_user(args, sizeof(args), (void *)vma->vm_mm->arg_start); - - value = bpf_map_lookup_elem(&array, &key); - if (value) - *value = 0; - value = bpf_map_lookup_elem(&hash, &key); - if (value) - *value = 0; - value = bpf_map_lookup_elem(&lru_hash, &key); - if (value) - *value = 0; - if (is_stack && monitored_pid == pid) { mprotect_count++; ret = -EPERM; @@ -77,10 +61,26 @@ SEC("lsm.s/bprm_committed_creds") int BPF_PROG(test_void_hook, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; + char args[64]; + __u32 key = 0; + __u64 *value; if (monitored_pid == pid) bprm_count++; + bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start); + bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start); + + value = bpf_map_lookup_elem(&array, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&hash, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&lru_hash, &key); + if (value) + *value = 0; + return 0; } SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */ -- cgit v1.3.1 From f69237e1e954b469175de4af8487c303d36c5467 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Tue, 28 Jul 2020 00:32:41 -0700 Subject: selftests: more general make nesting support selftests can be built from the toplevel kernel makefile (e.g. make kselftest-all) or directly (make -C tools/testing/selftests all). The toplevel kernel makefile explicitly disables implicit rules with "MAKEFLAGS += -rR", which is passed to tools/testing/selftests. Some selftest makefiles require implicit make rules, which is why commit 67d8712dcc70 ("selftests: Fix build failures when invoked from kselftest target") reenables implicit rules by clearing MAKEFLAGS if MAKELEVEL=1. So far so good. However, if the toplevel makefile is called from an outer makefile then MAKELEVEL will be elevated, which breaks the MAKELEVEL equality test. Example wrapped makefile error: $ cat ~/Makefile all: $(MAKE) defconfig $(MAKE) kselftest-all $ make -sf ~/Makefile futex_wait_timeout.c /src/tools/testing/selftests/kselftest_harness.h /src/tools/testing/selftests/kselftest.h ../include/futextest.h ../include/atomic.h ../include/logging.h -lpthread -lrt -o /src/tools/testing/selftests/futex/functional/futex_wait_timeout make[4]: futex_wait_timeout.c: Command not found Rather than checking $(MAKELEVEL), check for $(LINK.c), which is a more direct side effect of "make -R". This enables arbitrary makefile nesting. Signed-off-by: Greg Thelen Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9018f45d631d..15c1c1359c50 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -88,10 +88,10 @@ endif # of the targets gets built. FORCE_TARGETS ?= -# Clear LDFLAGS and MAKEFLAGS if called from main -# Makefile to avoid test build failures when test -# Makefile doesn't have explicit build rules. -ifeq (1,$(MAKELEVEL)) +# Clear LDFLAGS and MAKEFLAGS when implicit rules are missing. This provides +# implicit rules to sub-test Makefiles which avoids build failures in test +# Makefile that don't have explicit build rules. +ifeq (,$(LINK.c)) override LDFLAGS = override MAKEFLAGS = endif -- cgit v1.3.1 From 5578d008d9e06bb531fb3e62dd17096d9fd9c853 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Tue, 11 Aug 2020 14:27:55 -0700 Subject: kunit: tool: fix running kunit_tool from outside kernel tree Currently kunit_tool does not work correctly when executed from a path outside of the kernel tree, so make sure that the current working directory is correct and the kunit_dir is properly initialized before running. Signed-off-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 425ef40067e7..e2caf4e24ecb 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -237,9 +237,13 @@ def main(argv, linux=None): cli_args = parser.parse_args(argv) + if get_kernel_root_path(): + os.chdir(get_kernel_root_path()) + if cli_args.subcommand == 'run': if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) + create_default_kunitconfig() if not linux: linux = kunit_kernel.LinuxSourceTree() @@ -257,6 +261,7 @@ def main(argv, linux=None): if cli_args.build_dir: if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) + create_default_kunitconfig() if not linux: linux = kunit_kernel.LinuxSourceTree() @@ -270,10 +275,6 @@ def main(argv, linux=None): if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'build': - if cli_args.build_dir: - if not os.path.exists(cli_args.build_dir): - os.mkdir(cli_args.build_dir) - if not linux: linux = kunit_kernel.LinuxSourceTree() @@ -288,10 +289,6 @@ def main(argv, linux=None): if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'exec': - if cli_args.build_dir: - if not os.path.exists(cli_args.build_dir): - os.mkdir(cli_args.build_dir) - if not linux: linux = kunit_kernel.LinuxSourceTree() -- cgit v1.3.1 From 21a6d1780d5bbfca0ce9b8104ca6233502fcbf86 Mon Sep 17 00:00:00 2001 From: Heidi Fahim Date: Tue, 11 Aug 2020 14:27:56 -0700 Subject: kunit: tool: allow generating test results in JSON Add a --json flag, which when specified generates JSON formatted test results conforming to the KernelCI API test_group spec[1]. The user can use the new flag to specify a filename to print the json formatted results to. Link[1]: https://api.kernelci.org/schema-test-group.html#post Signed-off-by: Heidi Fahim Signed-off-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 35 +++++++++++++++---- tools/testing/kunit/kunit_json.py | 63 ++++++++++++++++++++++++++++++++++ tools/testing/kunit/kunit_tool_test.py | 33 ++++++++++++++++++ 3 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 tools/testing/kunit/kunit_json.py (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index e2caf4e24ecb..3c95a0eb0d04 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -17,6 +17,7 @@ from collections import namedtuple from enum import Enum, auto import kunit_config +import kunit_json import kunit_kernel import kunit_parser @@ -30,9 +31,9 @@ KunitBuildRequest = namedtuple('KunitBuildRequest', KunitExecRequest = namedtuple('KunitExecRequest', ['timeout', 'build_dir', 'alltests']) KunitParseRequest = namedtuple('KunitParseRequest', - ['raw_output', 'input_data']) + ['raw_output', 'input_data', 'build_dir', 'json']) KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', - 'build_dir', 'alltests', + 'build_dir', 'alltests', 'json', 'make_options']) KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] @@ -113,12 +114,22 @@ def parse_tests(request: KunitParseRequest) -> KunitResult: test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS, [], 'Tests not Parsed.') + if request.raw_output: kunit_parser.raw_output(request.input_data) else: test_result = kunit_parser.parse_run_tests(request.input_data) parse_end = time.time() + if request.json: + json_obj = kunit_json.get_json_result( + test_result=test_result, + def_config='kunit_defconfig', + build_dir=request.build_dir, + json_path=request.json) + if request.json == 'stdout': + print(json_obj) + if test_result.status != kunit_parser.TestStatus.SUCCESS: return KunitResult(KunitStatus.TEST_FAILURE, test_result, parse_end - parse_start) @@ -151,7 +162,9 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, return exec_result parse_request = KunitParseRequest(request.raw_output, - exec_result.result) + exec_result.result, + request.build_dir, + request.json) parse_result = parse_tests(parse_request) run_end = time.time() @@ -195,7 +208,12 @@ def add_exec_opts(parser): def add_parse_opts(parser): parser.add_argument('--raw_output', help='don\'t format output from kernel', action='store_true') - + parser.add_argument('--json', + nargs='?', + help='Stores test results in a JSON, and either ' + 'prints to stdout or saves to file if a ' + 'filename is specified', + type=str, const='stdout', default=None) def main(argv, linux=None): parser = argparse.ArgumentParser( @@ -253,6 +271,7 @@ def main(argv, linux=None): cli_args.jobs, cli_args.build_dir, cli_args.alltests, + cli_args.json, cli_args.make_options) result = run_tests(linux, request) if result.status != KunitStatus.SUCCESS: @@ -297,7 +316,9 @@ def main(argv, linux=None): cli_args.alltests) exec_result = exec_tests(linux, exec_request) parse_request = KunitParseRequest(cli_args.raw_output, - exec_result.result) + exec_result.result, + cli_args.build_dir, + cli_args.json) result = parse_tests(parse_request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( @@ -311,7 +332,9 @@ def main(argv, linux=None): with open(cli_args.file, 'r') as f: kunit_output = f.read().splitlines() request = KunitParseRequest(cli_args.raw_output, - kunit_output) + kunit_output, + cli_args.build_dir, + cli_args.json) result = parse_tests(request) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py new file mode 100644 index 000000000000..624b31b2dbd6 --- /dev/null +++ b/tools/testing/kunit/kunit_json.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Generates JSON from KUnit results according to +# KernelCI spec: https://github.com/kernelci/kernelci-doc/wiki/Test-API +# +# Copyright (C) 2020, Google LLC. +# Author: Heidi Fahim + +import json +import os + +import kunit_parser + +from kunit_parser import TestStatus + +def get_json_result(test_result, def_config, build_dir, json_path): + sub_groups = [] + + # Each test suite is mapped to a KernelCI sub_group + for test_suite in test_result.suites: + sub_group = { + "name": test_suite.name, + "arch": "UM", + "defconfig": def_config, + "build_environment": build_dir, + "test_cases": [], + "lab_name": None, + "kernel": None, + "job": None, + "git_branch": "kselftest", + } + test_cases = [] + # TODO: Add attachments attribute in test_case with detailed + # failure message, see https://api.kernelci.org/schema-test-case.html#get + for case in test_suite.cases: + test_case = {"name": case.name, "status": "FAIL"} + if case.status == TestStatus.SUCCESS: + test_case["status"] = "PASS" + elif case.status == TestStatus.TEST_CRASHED: + test_case["status"] = "ERROR" + test_cases.append(test_case) + sub_group["test_cases"] = test_cases + sub_groups.append(sub_group) + test_group = { + "name": "KUnit Test Group", + "arch": "UM", + "defconfig": def_config, + "build_environment": build_dir, + "sub_groups": sub_groups, + "lab_name": None, + "kernel": None, + "job": None, + "git_branch": "kselftest", + } + json_obj = json.dumps(test_group, indent=4) + if json_path != 'stdout': + with open(json_path, 'w') as result_path: + result_path.write(json_obj) + root = __file__.split('tools/testing/kunit/')[0] + kunit_parser.print_with_timestamp( + "Test results stored in %s" % + os.path.join(root, result_path.name)) + return json_obj diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 287c74d821c3..99c3c5671ea4 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -11,11 +11,13 @@ from unittest import mock import tempfile, shutil # Handling test_tmpdir +import json import os import kunit_config import kunit_parser import kunit_kernel +import kunit_json import kunit test_tmpdir = '' @@ -230,6 +232,37 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual('kunit-resource-test', result.suites[0].name) +class KUnitJsonTest(unittest.TestCase): + + def _json_for(self, log_file): + with(open(get_absolute_path(log_file))) as file: + test_result = kunit_parser.parse_run_tests(file) + json_obj = kunit_json.get_json_result( + test_result=test_result, + def_config='kunit_defconfig', + build_dir=None, + json_path='stdout') + return json.loads(json_obj) + + def test_failed_test_json(self): + result = self._json_for( + 'test_data/test_is_test_passed-failure.log') + self.assertEqual( + {'name': 'example_simple_test', 'status': 'FAIL'}, + result["sub_groups"][1]["test_cases"][0]) + + def test_crashed_test_json(self): + result = self._json_for( + 'test_data/test_is_test_passed-crash.log') + self.assertEqual( + {'name': 'example_simple_test', 'status': 'ERROR'}, + result["sub_groups"][1]["test_cases"][0]) + + def test_no_tests_json(self): + result = self._json_for( + 'test_data/test_is_test_passed-no_tests_run.log') + self.assertEqual(0, len(result['sub_groups'])) + class StrContains(str): def __eq__(self, other): return self in other -- cgit v1.3.1 From 70d932985757fbe978024db313001218e9f8fe5c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:36 +0200 Subject: notifier: Fix broken error handling pattern The current notifiers have the following error handling pattern all over the place: int err, nr; err = __foo_notifier_call_chain(&chain, val_up, v, -1, &nr); if (err & NOTIFIER_STOP_MASK) __foo_notifier_call_chain(&chain, val_down, v, nr-1, NULL) And aside from the endless repetition thereof, it is broken. Consider blocking notifiers; both calls take and drop the rwsem, this means that the notifier list can change in between the two calls, making @nr meaningless. Fix this by replacing all the __foo_notifier_call_chain() functions with foo_notifier_call_chain_robust() that embeds the above pattern, but ensures it is inside a single lock region. Note: I switched atomic_notifier_call_chain_robust() to use the spinlock, since RCU cannot provide the guarantee required for the recovery. Note: software_resume() error handling was broken afaict. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20200818135804.325626653@infradead.org --- include/linux/notifier.h | 15 ++-- kernel/cpu_pm.c | 48 +++++-------- kernel/notifier.c | 144 ++++++++++++++++++++++--------------- kernel/power/hibernate.c | 39 +++++----- kernel/power/main.c | 8 +-- kernel/power/power.h | 3 +- kernel/power/suspend.c | 14 ++-- kernel/power/user.c | 14 ++-- tools/power/pm-graph/sleepgraph.py | 2 +- 9 files changed, 147 insertions(+), 140 deletions(-) (limited to 'tools') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 018947611483..2fb373a5c1ed 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -161,20 +161,19 @@ extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh, unsigned long val, void *v); -extern int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh, unsigned long val, void *v); -extern int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v); -extern int __raw_notifier_call_chain(struct raw_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, unsigned long val, void *v); -extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); + +extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); +extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); +extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 44a259338e33..f7e1d0eccdbc 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -15,18 +15,28 @@ static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain); -static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls) +static int cpu_pm_notify(enum cpu_pm_event event) { int ret; /* - * __atomic_notifier_call_chain has a RCU read critical section, which + * atomic_notifier_call_chain has a RCU read critical section, which * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let * RCU know this. */ rcu_irq_enter_irqson(); - ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, - nr_to_call, nr_calls); + ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL); + rcu_irq_exit_irqson(); + + return notifier_to_errno(ret); +} + +static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event event_down) +{ + int ret; + + rcu_irq_enter_irqson(); + ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL); rcu_irq_exit_irqson(); return notifier_to_errno(ret); @@ -80,18 +90,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); */ int cpu_pm_enter(void) { - int nr_calls = 0; - int ret = 0; - - ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls); - if (ret) - /* - * Inform listeners (nr_calls - 1) about failure of CPU PM - * PM entry who are notified earlier to prepare for it. - */ - cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL); - - return ret; + return cpu_pm_notify_robust(CPU_PM_ENTER, CPU_PM_ENTER_FAILED); } EXPORT_SYMBOL_GPL(cpu_pm_enter); @@ -109,7 +108,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter); */ int cpu_pm_exit(void) { - return cpu_pm_notify(CPU_PM_EXIT, -1, NULL); + return cpu_pm_notify(CPU_PM_EXIT); } EXPORT_SYMBOL_GPL(cpu_pm_exit); @@ -131,18 +130,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit); */ int cpu_cluster_pm_enter(void) { - int nr_calls = 0; - int ret = 0; - - ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls); - if (ret) - /* - * Inform listeners (nr_calls - 1) about failure of CPU cluster - * PM entry who are notified earlier to prepare for it. - */ - cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL); - - return ret; + return cpu_pm_notify_robust(CPU_CLUSTER_PM_ENTER, CPU_CLUSTER_PM_ENTER_FAILED); } EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); @@ -163,7 +151,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); */ int cpu_cluster_pm_exit(void) { - return cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL); + return cpu_pm_notify(CPU_CLUSTER_PM_EXIT); } EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit); diff --git a/kernel/notifier.c b/kernel/notifier.c index 84c987dfbe03..1b019cbca594 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -94,6 +94,34 @@ static int notifier_call_chain(struct notifier_block **nl, } NOKPROBE_SYMBOL(notifier_call_chain); +/** + * notifier_call_chain_robust - Inform the registered notifiers about an event + * and rollback on error. + * @nl: Pointer to head of the blocking notifier chain + * @val_up: Value passed unmodified to the notifier function + * @val_down: Value passed unmodified to the notifier function when recovering + * from an error on @val_up + * @v Pointer passed unmodified to the notifier function + * + * NOTE: It is important the @nl chain doesn't change between the two + * invocations of notifier_call_chain() such that we visit the + * exact same notifier callbacks; this rules out any RCU usage. + * + * Returns: the return value of the @val_up call. + */ +static int notifier_call_chain_robust(struct notifier_block **nl, + unsigned long val_up, unsigned long val_down, + void *v) +{ + int ret, nr = 0; + + ret = notifier_call_chain(nl, val_up, v, -1, &nr); + if (ret & NOTIFY_STOP_MASK) + notifier_call_chain(nl, val_down, v, nr-1, NULL); + + return ret; +} + /* * Atomic notifier chain routines. Registration and unregistration * use a spinlock, and call_chain is synchronized by RCU (no locks). @@ -144,13 +172,30 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, } EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); +int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v) +{ + unsigned long flags; + int ret; + + /* + * Musn't use RCU; because then the notifier list can + * change between the up and down traversal. + */ + spin_lock_irqsave(&nh->lock, flags); + ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v); + spin_unlock_irqrestore(&nh->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust); +NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust); + /** - * __atomic_notifier_call_chain - Call functions in an atomic notifier chain + * atomic_notifier_call_chain - Call functions in an atomic notifier chain * @nh: Pointer to head of the atomic notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function - * @nr_to_call: See the comment for notifier_call_chain. - * @nr_calls: See the comment for notifier_call_chain. * * Calls each function in a notifier chain in turn. The functions * run in an atomic context, so they must not block. @@ -163,24 +208,16 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) +int atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v) { int ret; rcu_read_lock(); - ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); + ret = notifier_call_chain(&nh->head, val, v, -1, NULL); rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain); -NOKPROBE_SYMBOL(__atomic_notifier_call_chain); -int atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v) -{ - return __atomic_notifier_call_chain(nh, val, v, -1, NULL); + return ret; } EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); NOKPROBE_SYMBOL(atomic_notifier_call_chain); @@ -250,13 +287,30 @@ int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, } EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); +int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v) +{ + int ret = NOTIFY_DONE; + + /* + * We check the head outside the lock, but if this access is + * racy then it does not matter what the result of the test + * is, we re-check the list after having taken the lock anyway: + */ + if (rcu_access_pointer(nh->head)) { + down_read(&nh->rwsem); + ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v); + up_read(&nh->rwsem); + } + return ret; +} +EXPORT_SYMBOL_GPL(blocking_notifier_call_chain_robust); + /** - * __blocking_notifier_call_chain - Call functions in a blocking notifier chain + * blocking_notifier_call_chain - Call functions in a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function - * @nr_to_call: See comment for notifier_call_chain. - * @nr_calls: See comment for notifier_call_chain. * * Calls each function in a notifier chain in turn. The functions * run in a process context, so they are allowed to block. @@ -268,9 +322,8 @@ EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) +int blocking_notifier_call_chain(struct blocking_notifier_head *nh, + unsigned long val, void *v) { int ret = NOTIFY_DONE; @@ -281,19 +334,11 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, */ if (rcu_access_pointer(nh->head)) { down_read(&nh->rwsem); - ret = notifier_call_chain(&nh->head, val, v, nr_to_call, - nr_calls); + ret = notifier_call_chain(&nh->head, val, v, -1, NULL); up_read(&nh->rwsem); } return ret; } -EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain); - -int blocking_notifier_call_chain(struct blocking_notifier_head *nh, - unsigned long val, void *v) -{ - return __blocking_notifier_call_chain(nh, val, v, -1, NULL); -} EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); /* @@ -335,13 +380,18 @@ int raw_notifier_chain_unregister(struct raw_notifier_head *nh, } EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); +int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v) +{ + return notifier_call_chain_robust(&nh->head, val_up, val_down, v); +} +EXPORT_SYMBOL_GPL(raw_notifier_call_chain_robust); + /** - * __raw_notifier_call_chain - Call functions in a raw notifier chain + * raw_notifier_call_chain - Call functions in a raw notifier chain * @nh: Pointer to head of the raw notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function - * @nr_to_call: See comment for notifier_call_chain. - * @nr_calls: See comment for notifier_call_chain * * Calls each function in a notifier chain in turn. The functions * run in an undefined context. @@ -354,18 +404,10 @@ EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __raw_notifier_call_chain(struct raw_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) -{ - return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); -} -EXPORT_SYMBOL_GPL(__raw_notifier_call_chain); - int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v) { - return __raw_notifier_call_chain(nh, val, v, -1, NULL); + return notifier_call_chain(&nh->head, val, v, -1, NULL); } EXPORT_SYMBOL_GPL(raw_notifier_call_chain); @@ -437,12 +479,10 @@ int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); /** - * __srcu_notifier_call_chain - Call functions in an SRCU notifier chain + * srcu_notifier_call_chain - Call functions in an SRCU notifier chain * @nh: Pointer to head of the SRCU notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function - * @nr_to_call: See comment for notifier_call_chain. - * @nr_calls: See comment for notifier_call_chain * * Calls each function in a notifier chain in turn. The functions * run in a process context, so they are allowed to block. @@ -454,25 +494,17 @@ EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) +int srcu_notifier_call_chain(struct srcu_notifier_head *nh, + unsigned long val, void *v) { int ret; int idx; idx = srcu_read_lock(&nh->srcu); - ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); + ret = notifier_call_chain(&nh->head, val, v, -1, NULL); srcu_read_unlock(&nh->srcu, idx); return ret; } -EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain); - -int srcu_notifier_call_chain(struct srcu_notifier_head *nh, - unsigned long val, void *v) -{ - return __srcu_notifier_call_chain(nh, val, v, -1, NULL); -} EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); /** diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e7aa57fb2fdc..1dee70815f3c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -706,8 +706,8 @@ static int load_image_and_restore(void) */ int hibernate(void) { - int error, nr_calls = 0; bool snapshot_test = false; + int error; if (!hibernation_available()) { pm_pr_dbg("Hibernation not available.\n"); @@ -723,11 +723,9 @@ int hibernate(void) pr_info("hibernation entry\n"); pm_prepare_console(); - error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Exit; - } + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); + if (error) + goto Restore; ksys_sync_helper(); @@ -785,7 +783,8 @@ int hibernate(void) /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; Exit: - __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_HIBERNATION); + Restore: pm_restore_console(); hibernate_release(); Unlock: @@ -804,7 +803,7 @@ int hibernate(void) */ int hibernate_quiet_exec(int (*func)(void *data), void *data) { - int error, nr_calls = 0; + int error; lock_system_sleep(); @@ -815,11 +814,9 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data) pm_prepare_console(); - error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto exit; - } + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); + if (error) + goto restore; error = freeze_processes(); if (error) @@ -880,8 +877,9 @@ thaw: thaw_processes(); exit: - __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_HIBERNATION); +restore: pm_restore_console(); hibernate_release(); @@ -910,7 +908,7 @@ EXPORT_SYMBOL_GPL(hibernate_quiet_exec); */ static int software_resume(void) { - int error, nr_calls = 0; + int error; /* * If the user said "noresume".. bail out early. @@ -997,11 +995,9 @@ static int software_resume(void) pr_info("resume from hibernation\n"); pm_prepare_console(); - error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Close_Finish; - } + error = pm_notifier_call_chain_robust(PM_RESTORE_PREPARE, PM_POST_RESTORE); + if (error) + goto Restore; pm_pr_dbg("Preparing processes for hibernation restore.\n"); error = freeze_processes(); @@ -1017,7 +1013,8 @@ static int software_resume(void) error = load_image_and_restore(); thaw_processes(); Finish: - __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_RESTORE); + Restore: pm_restore_console(); pr_info("resume failed (%d)\n", error); hibernate_release(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 40f86ec4ab30..0aefd6f57e0a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -80,18 +80,18 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); -int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls) +int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down) { int ret; - ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL, - nr_to_call, nr_calls); + ret = blocking_notifier_call_chain_robust(&pm_chain_head, val_up, val_down, NULL); return notifier_to_errno(ret); } + int pm_notifier_call_chain(unsigned long val) { - return __pm_notifier_call_chain(val, -1, NULL); + return blocking_notifier_call_chain(&pm_chain_head, val, NULL); } /* If set, devices may be suspended and resumed asynchronously. */ diff --git a/kernel/power/power.h b/kernel/power/power.h index 32fc89ac96c3..24f12d534515 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -210,8 +210,7 @@ static inline void suspend_test_finish(const char *label) {} #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ -extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call, - int *nr_calls); +extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down); extern int pm_notifier_call_chain(unsigned long val); #endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8b1bb5ee7e5d..32391acc806b 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -342,18 +342,16 @@ static int suspend_test(int level) */ static int suspend_prepare(suspend_state_t state) { - int error, nr_calls = 0; + int error; if (!sleep_state_supported(state)) return -EPERM; pm_prepare_console(); - error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Finish; - } + error = pm_notifier_call_chain_robust(PM_SUSPEND_PREPARE, PM_POST_SUSPEND); + if (error) + goto Restore; trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); @@ -363,8 +361,8 @@ static int suspend_prepare(suspend_state_t state) suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); - Finish: - __pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_SUSPEND); + Restore: pm_restore_console(); return error; } diff --git a/kernel/power/user.c b/kernel/power/user.c index d5eedc2baa2a..047f598f89a5 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -46,7 +46,7 @@ int is_hibernate_resume_dev(const struct inode *bd_inode) static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - int error, nr_calls = 0; + int error; if (!hibernation_available()) return -EPERM; @@ -73,9 +73,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; data->free_bitmaps = false; - error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); - if (error) - __pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL); + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); } else { /* * Resuming. We may need to wait for the image device to @@ -85,15 +83,11 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; - error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); + error = pm_notifier_call_chain_robust(PM_RESTORE_PREPARE, PM_POST_RESTORE); if (!error) { error = create_basic_memory_bitmaps(); data->free_bitmaps = !error; - } else - nr_calls--; - - if (error) - __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); + } } if (error) hibernate_release(); diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 46ff97e909c6..1bc36a1db14f 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -171,7 +171,7 @@ class SystemValues: tracefuncs = { 'sys_sync': {}, 'ksys_sync': {}, - '__pm_notifier_call_chain': {}, + 'pm_notifier_call_chain_robust': {}, 'pm_prepare_console': {}, 'pm_notifier_call_chain': {}, 'freeze_processes': {}, -- cgit v1.3.1 From 1e7e47883830aae5e8246a22ca2fc6883c61acdf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:45 +0200 Subject: x86/static_call: Add inline static call implementation for x86-64 Add the inline static call implementation for x86-64. The generated code is identical to the out-of-line case, except we move the trampoline into it's own section. Objtool uses the trampoline naming convention to detect all the call sites. It then annotates those call sites in the .static_call_sites section. During boot (and module init), the call sites are patched to call directly into the destination function. The temporary trampoline is then no longer used. [peterz: merged trampolines, put trampoline in section] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.864271425@infradead.org --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/static_call.h | 13 +++- arch/x86/kernel/static_call.c | 3 + arch/x86/kernel/vmlinux.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 6 ++ tools/include/linux/static_call_types.h | 28 +++++++ tools/objtool/check.c | 130 ++++++++++++++++++++++++++++++++ tools/objtool/check.h | 1 + tools/objtool/elf.c | 8 +- tools/objtool/elf.h | 3 +- tools/objtool/objtool.h | 1 + tools/objtool/orc_gen.c | 4 +- tools/objtool/sync-check.sh | 1 + 13 files changed, 193 insertions(+), 9 deletions(-) create mode 100644 tools/include/linux/static_call_types.h (limited to 'tools') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 595c06b32b3a..8a48d3eedb84 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -216,6 +216,7 @@ config X86 select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR select HAVE_STACK_VALIDATION if X86_64 select HAVE_STATIC_CALL + select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK @@ -231,6 +232,7 @@ config X86 select RTC_MC146818_LIB select SPARSE_IRQ select SRCU + select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE) select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select USER_STACKTRACE_SUPPORT @@ -452,7 +454,6 @@ config GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" default y - select STACK_VALIDATION if HAVE_STACK_VALIDATION help Compile kernel with the retpoline compiler options to guard against kernel-to-user data leaks by avoiding speculative indirect diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 07aa8791cbfe..33469ae3612c 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -5,12 +5,23 @@ #include /* + * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which + * uses the current value of the key->func pointer to do an indirect jump to + * the function. This trampoline is only used during boot, before the call + * sites get patched by static_call_update(). The name of this trampoline has + * a magical aspect: objtool uses it to find static call sites so it can create + * the .static_call_sites section. + * * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which * does a direct jump to the function. The direct jump gets patched by * static_call_update(). + * + * Having the trampoline in a special section forces GCC to emit a JMP.d32 when + * it does tail-call optimization on the call; since you cannot compute the + * relative displacement across sections. */ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ - asm(".pushsection .text, \"ax\" \n" \ + asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ STATIC_CALL_TRAMP_STR(name) ": \n" \ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 0565825970af..5ff2b639a1a6 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -26,6 +26,9 @@ void arch_static_call_transform(void *site, void *tramp, void *func) if (tramp) __static_call_transform(tramp, JMP32_INSN_OPCODE, func); + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) + __static_call_transform(site, CALL_INSN_OPCODE, func); + mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9a03e5b23135..2568f4cdcbd1 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -136,6 +136,7 @@ SECTIONS ENTRY_TEXT ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT + STATIC_CALL_TEXT *(.fixup) *(.gnu.warning) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0088a5cd6a40..0502087654d7 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -642,6 +642,12 @@ *(.softirqentry.text) \ __softirqentry_text_end = .; +#define STATIC_CALL_TEXT \ + ALIGN_FUNCTION(); \ + __static_call_text_start = .; \ + *(.static_call.text) \ + __static_call_text_end = .; + /* Section used for early init (in .S files) */ #define HEAD_TEXT KEEP(*(.head.text)) diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h new file mode 100644 index 000000000000..408d345d83e1 --- /dev/null +++ b/tools/include/linux/static_call_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _STATIC_CALL_TYPES_H +#define _STATIC_CALL_TYPES_H + +#include +#include + +#define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) +#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) +#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) + +#define STATIC_CALL_TRAMP_PREFIX __SCT__ +#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1) +#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) +#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) + +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + +#endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e034a8f24f46..f8f7a40c6ef3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -16,6 +16,7 @@ #include #include +#include #define FAKE_JUMP_OFFSET -1 @@ -433,6 +434,103 @@ reachable: return 0; } +static int create_static_call_sections(struct objtool_file *file) +{ + struct section *sec, *reloc_sec; + struct reloc *reloc; + struct static_call_site *site; + struct instruction *insn; + struct symbol *key_sym; + char *key_name, *tmp; + int idx; + + sec = find_section_by_name(file->elf, ".static_call_sites"); + if (sec) { + INIT_LIST_HEAD(&file->static_call_list); + WARN("file already has .static_call_sites section, skipping"); + return 0; + } + + if (list_empty(&file->static_call_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) + idx++; + + sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, + sizeof(struct static_call_site), idx); + if (!sec) + return -1; + + reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!reloc_sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) { + + site = (struct static_call_site *)sec->data->d_buf + idx; + memset(site, 0, sizeof(struct static_call_site)); + + /* populate reloc for 'addr' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = insn->sec->sym; + reloc->addend = insn->offset; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site); + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + /* find key symbol */ + key_name = strdup(insn->call_dest->name); + if (!key_name) { + perror("strdup"); + return -1; + } + if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, + STATIC_CALL_TRAMP_PREFIX_LEN)) { + WARN("static_call: trampoline name malformed: %s", key_name); + return -1; + } + tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; + memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN); + + key_sym = find_symbol_by_name(file->elf, tmp); + if (!key_sym) { + WARN("static_call: can't find static_call_key symbol: %s", tmp); + return -1; + } + free(key_name); + + /* populate reloc for 'key' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = key_sym; + reloc->addend = 0; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site) + 4; + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + idx++; + } + + if (elf_rebuild_reloc_section(file->elf, reloc_sec)) + return -1; + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -1522,6 +1620,23 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } +static int read_static_call_tramps(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->bind == STB_GLOBAL && + !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + strlen(STATIC_CALL_TRAMP_PREFIX_STR))) + func->static_call_tramp = true; + } + } + + return 0; +} + static void mark_rodata(struct objtool_file *file) { struct section *sec; @@ -1601,6 +1716,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_static_call_tramps(file); + if (ret) + return ret; + return 0; } @@ -2432,6 +2551,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; + if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } + break; case INSN_JUMP_CONDITIONAL: @@ -2791,6 +2915,7 @@ int check(const char *_objname, bool orc) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.static_call_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); file.ignore_unreachables = no_unreachable; file.hints = false; @@ -2838,6 +2963,11 @@ int check(const char *_objname, bool orc) warnings += ret; } + ret = create_static_call_sections(&file); + if (ret < 0) + goto out; + warnings += ret; + if (orc) { ret = create_orc(&file); if (ret < 0) diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 061aa96e15d3..36d38b9153ac 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -22,6 +22,7 @@ struct insn_state { struct instruction { struct list_head list; struct hlist_node hash; + struct list_head static_call_node; struct section *sec; unsigned long offset; unsigned int len; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3ddbd66f1a37..4e1d7460574b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -652,7 +652,7 @@ err: } struct section *elf_create_section(struct elf *elf, const char *name, - size_t entsize, int nr) + unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; @@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_entsize = entsize; sec->sh.sh_type = SHT_PROGBITS; sec->sh.sh_addralign = 1; - sec->sh.sh_flags = SHF_ALLOC; + sec->sh.sh_flags = SHF_ALLOC | sh_flags; /* Add section name to .shstrtab (or .strtab for Clang) */ @@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect strcpy(relocname, ".rel"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0); free(relocname); if (!sec) return NULL; @@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec strcpy(relocname, ".rela"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); free(relocname); if (!sec) return NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 6cc80a075166..807f8c670097 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -56,6 +56,7 @@ struct symbol { unsigned int len; struct symbol *pfunc, *cfunc, *alias; bool uaccess_safe; + bool static_call_tramp; }; struct reloc { @@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc) } struct elf *elf_open_read(const char *name, int flags); -struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr); +struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); void elf_add_reloc(struct elf *elf, struct reloc *reloc); int elf_write_insn(struct elf *elf, struct section *sec, diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 528028a66816..9a7cd0b88bd8 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -16,6 +16,7 @@ struct objtool_file { struct elf *elf; struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); + struct list_head static_call_list; bool ignore_unreachables, c_file, hints, rodata; }; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 968f55e6dd94..e6b2363c2e03 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -177,7 +177,7 @@ int create_orc_sections(struct objtool_file *file) /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ - sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); if (!sec) return -1; @@ -186,7 +186,7 @@ int create_orc_sections(struct objtool_file *file) return -1; /* create .orc_unwind section */ - u_sec = elf_create_section(file->elf, ".orc_unwind", + u_sec = elf_create_section(file->elf, ".orc_unwind", 0, sizeof(struct orc_entry), idx); /* populate sections */ diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 2a1261bfbb62..aa099b21dffa 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -7,6 +7,7 @@ arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk +include/linux/static_call_types.h ' check_2 () { -- cgit v1.3.1 From 5b06fd3bb9cdce4f3e731c48eb5b74c4acc47997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:49 +0200 Subject: static_call: Handle tail-calls GCC can turn our static_call(name)(args...) into a tail call, in which case we get a JMP.d32 into the trampoline (which then does a further tail-call). Teach objtool to recognise and mark these in .static_call_sites and adjust the code patching to deal with this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.101186767@infradead.org --- arch/x86/kernel/static_call.c | 21 ++++++++++++++++++--- include/linux/static_call.h | 4 ++-- include/linux/static_call_types.h | 7 +++++++ kernel/static_call.c | 21 +++++++++++++-------- tools/include/linux/static_call_types.h | 7 +++++++ tools/objtool/check.c | 18 +++++++++++++----- 6 files changed, 60 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index ead6726fb06d..60a325c731df 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -41,15 +41,30 @@ static void __static_call_transform(void *insn, enum insn_type type, void *func) text_poke_bp(insn, code, size, NULL); } -void arch_static_call_transform(void *site, void *tramp, void *func) +static inline enum insn_type __sc_insn(bool null, bool tail) +{ + /* + * Encode the following table without branches: + * + * tail null insn + * -----+-------+------ + * 0 | 0 | CALL + * 0 | 1 | NOP + * 1 | 0 | JMP + * 1 | 1 | RET + */ + return 2*tail + null; +} + +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { mutex_lock(&text_mutex); if (tramp) - __static_call_transform(tramp, func ? JMP : RET, func); + __static_call_transform(tramp, __sc_insn(!func, true), func); if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) - __static_call_transform(site, func ? CALL : NOP, func); + __static_call_transform(site, __sc_insn(!func, tail), func); mutex_unlock(&text_mutex); } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 0f74581e0e2f..519bd666e096 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -103,7 +103,7 @@ /* * Either @site or @tramp can be NULL. */ -extern void arch_static_call_transform(void *site, void *tramp, void *func); +extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail); #define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name) @@ -206,7 +206,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) { cpus_read_lock(); WRITE_ONCE(key->func, func); - arch_static_call_transform(NULL, tramp, func); + arch_static_call_transform(NULL, tramp, func, false); cpus_read_unlock(); } diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index 408d345d83e1..89135bb35bf7 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -16,6 +16,13 @@ #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). diff --git a/kernel/static_call.c b/kernel/static_call.c index 97142cb6bfa6..d98e0e4272c1 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -15,8 +15,6 @@ extern struct static_call_site __start_static_call_sites[], static bool static_call_initialized; -#define STATIC_CALL_INIT 1UL - /* mutex to protect key modules/sites */ static DEFINE_MUTEX(static_call_mutex); @@ -39,18 +37,23 @@ static inline void *static_call_addr(struct static_call_site *site) static inline struct static_call_key *static_call_key(const struct static_call_site *site) { return (struct static_call_key *) - (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT); + (((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS); } /* These assume the key is word-aligned. */ static inline bool static_call_is_init(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT; + return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT; +} + +static inline bool static_call_is_tail(struct static_call_site *site) +{ + return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL; } static inline void static_call_set_init(struct static_call_site *site) { - site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) - + site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) - (long)&site->key; } @@ -104,7 +107,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) key->func = func; - arch_static_call_transform(NULL, tramp, func); + arch_static_call_transform(NULL, tramp, func, false); /* * If uninitialized, we'll not update the callsites, but they still @@ -154,7 +157,8 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) continue; } - arch_static_call_transform(site_addr, NULL, func); + arch_static_call_transform(site_addr, NULL, func, + static_call_is_tail(site)); } } @@ -198,7 +202,8 @@ static int __static_call_init(struct module *mod, key->mods = site_mod; } - arch_static_call_transform(site_addr, NULL, key->func); + arch_static_call_transform(site_addr, NULL, key->func, + static_call_is_tail(site)); } return 0; diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index 408d345d83e1..89135bb35bf7 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -16,6 +16,13 @@ #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f8f7a40c6ef3..75d0cd2f9044 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -516,7 +516,7 @@ static int create_static_call_sections(struct objtool_file *file) } memset(reloc, 0, sizeof(*reloc)); reloc->sym = key_sym; - reloc->addend = 0; + reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; reloc->type = R_X86_64_PC32; reloc->offset = idx * sizeof(struct static_call_site) + 4; reloc->sec = reloc_sec; @@ -747,6 +747,10 @@ static int add_jump_destinations(struct objtool_file *file) } else { /* external sibling call */ insn->call_dest = reloc->sym; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } continue; } @@ -798,6 +802,10 @@ static int add_jump_destinations(struct objtool_file *file) /* internal sibling call */ insn->call_dest = insn->jump_dest->func; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } } } } @@ -1684,6 +1692,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_static_call_tramps(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; @@ -1716,10 +1728,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_static_call_tramps(file); - if (ret) - return ret; - return 0; } -- cgit v1.3.1 From b784a88e523a7c1912fc75486ce8c029ee6fe523 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 Jun 2020 13:32:34 +0100 Subject: perf: Fix opt help text for --no-bpf-event The opt name was once inverted but the help text didn't reflect the change. Fixes: 71184c6ab7e6 ("perf record: Replace option --bpf-event with --no-bpf-event") Signed-off-by: Fam Zheng Signed-off-by: Jiri Kosina --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f91352f847c0..772f1057647f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2452,7 +2452,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), - OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), + OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", -- cgit v1.3.1 From cae1d5a2c5a491141faa747e9944ba40ab4ab786 Mon Sep 17 00:00:00 2001 From: Anatoly Pugachev Date: Tue, 16 Jun 2020 15:26:18 +0300 Subject: selftests: vm: add fragment CONFIG_GUP_BENCHMARK When running gup_benchmark test the following output states that the config options is missing. $ sudo ./gup_benchmark open: No such file or directory $ sudo strace -e trace=file ./gup_benchmark 2>&1 | tail -3 openat(AT_FDCWD, "/sys/kernel/debug/gup_benchmark", O_RDWR) = -1 ENOENT (No such file or directory) open: No such file or directory +++ exited with 1 +++ Fix it by adding config option fragment. Fixes: 64c349f4ae78 ("mm: add infrastructure for get_user_pages_fast() benchmarking") Signed-off-by: Anatoly Pugachev CC: Jiri Kosina CC: Shuah Khan Signed-off-by: Jiri Kosina --- tools/testing/selftests/vm/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 3ba674b64fa9..69dd0d1aa30b 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -3,3 +3,4 @@ CONFIG_USERFAULTFD=y CONFIG_TEST_VMALLOC=m CONFIG_DEVICE_PRIVATE=y CONFIG_TEST_HMM=m +CONFIG_GUP_BENCHMARK=y -- cgit v1.3.1 From 6545eb030e6f14cef8793a86312483c788eaee46 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 25 Aug 2020 13:47:39 +0100 Subject: objtool: Move object file loading out of check() Structure objtool_file can be used by different subcommands. In fact it already is, by check and orc. Provide a function that allows to initialize objtool_file, that builtin can call, without relying on check to do the correct setup for them and explicitly hand the objtool_file to them. Reviewed-by: Miroslav Benes Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/builtin-check.c | 7 ++++++- tools/objtool/builtin-orc.c | 8 +++++++- tools/objtool/check.c | 42 +++++++++++++----------------------------- tools/objtool/objtool.c | 30 ++++++++++++++++++++++++++++++ tools/objtool/objtool.h | 4 +++- tools/objtool/weak.c | 4 +--- 6 files changed, 60 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 7a44174967b5..0126ec3bb6c9 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -41,6 +41,7 @@ const struct option check_options[] = { int cmd_check(int argc, const char **argv) { const char *objname, *s; + struct objtool_file *file; argc = parse_options(argc, argv, check_options, check_usage, 0); @@ -53,5 +54,9 @@ int cmd_check(int argc, const char **argv) if (s && !s[9]) vmlinux = true; - return check(objname, false); + file = objtool_open_read(objname); + if (!file) + return 1; + + return check(file, false); } diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index b1dfe2007962..3979f275a775 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -31,13 +31,19 @@ int cmd_orc(int argc, const char **argv) usage_with_options(orc_usage, check_options); if (!strncmp(argv[0], "gen", 3)) { + struct objtool_file *file; + argc = parse_options(argc, argv, check_options, orc_usage, 0); if (argc != 1) usage_with_options(orc_usage, check_options); objname = argv[0]; - return check(objname, true); + file = objtool_open_read(objname); + if (!file) + return 1; + + return check(file, true); } if (!strcmp(argv[0], "dump")) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 75d0cd2f9044..9d4efa3b12ba 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -28,7 +28,6 @@ struct alternative { bool skip_orig; }; -const char *objname; struct cfi_init_state initial_func_cfi; struct instruction *find_insn(struct objtool_file *file, @@ -2909,37 +2908,22 @@ static int validate_reachable_instructions(struct objtool_file *file) return 0; } -static struct objtool_file file; - -int check(const char *_objname, bool orc) +int check(struct objtool_file *file, bool orc) { int ret, warnings = 0; - objname = _objname; - - file.elf = elf_open_read(objname, O_RDWR); - if (!file.elf) - return 1; - - INIT_LIST_HEAD(&file.insn_list); - hash_init(file.insn_hash); - INIT_LIST_HEAD(&file.static_call_list); - file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); - file.ignore_unreachables = no_unreachable; - file.hints = false; - arch_initial_func_cfi_state(&initial_func_cfi); - ret = decode_sections(&file); + ret = decode_sections(file); if (ret < 0) goto out; warnings += ret; - if (list_empty(&file.insn_list)) + if (list_empty(&file->insn_list)) goto out; if (vmlinux && !validate_dup) { - ret = validate_vmlinux_functions(&file); + ret = validate_vmlinux_functions(file); if (ret < 0) goto out; @@ -2948,46 +2932,46 @@ int check(const char *_objname, bool orc) } if (retpoline) { - ret = validate_retpoline(&file); + ret = validate_retpoline(file); if (ret < 0) return ret; warnings += ret; } - ret = validate_functions(&file); + ret = validate_functions(file); if (ret < 0) goto out; warnings += ret; - ret = validate_unwind_hints(&file, NULL); + ret = validate_unwind_hints(file, NULL); if (ret < 0) goto out; warnings += ret; if (!warnings) { - ret = validate_reachable_instructions(&file); + ret = validate_reachable_instructions(file); if (ret < 0) goto out; warnings += ret; } - ret = create_static_call_sections(&file); + ret = create_static_call_sections(file); if (ret < 0) goto out; warnings += ret; if (orc) { - ret = create_orc(&file); + ret = create_orc(file); if (ret < 0) goto out; - ret = create_orc_sections(&file); + ret = create_orc_sections(file); if (ret < 0) goto out; } - if (file.elf->changed) { - ret = elf_write(file.elf); + if (file->elf->changed) { + ret = elf_write(file->elf); if (ret < 0) goto out; } diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 58fdda510653..9df0cd86d310 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -22,6 +22,8 @@ #include #include "builtin.h" +#include "objtool.h" +#include "warn.h" struct cmd_struct { const char *name; @@ -39,6 +41,34 @@ static struct cmd_struct objtool_cmds[] = { bool help; +const char *objname; +static struct objtool_file file; + +struct objtool_file *objtool_open_read(const char *_objname) +{ + if (objname) { + if (strcmp(objname, _objname)) { + WARN("won't handle more than one file at a time"); + return NULL; + } + return &file; + } + objname = _objname; + + file.elf = elf_open_read(objname, O_RDWR); + if (!file.elf) + return NULL; + + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.static_call_list); + file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); + file.ignore_unreachables = no_unreachable; + file.hints = false; + + return &file; +} + static void cmd_usage(void) { unsigned int i, longest = 0; diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 9a7cd0b88bd8..7efc43f22174 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -20,7 +20,9 @@ struct objtool_file { bool ignore_unreachables, c_file, hints, rodata; }; -int check(const char *objname, bool orc); +struct objtool_file *objtool_open_read(const char *_objname); + +int check(struct objtool_file *file, bool orc); int orc_dump(const char *objname); int create_orc(struct objtool_file *file); int create_orc_sections(struct objtool_file *file); diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index 942ea5e8ac36..82698319f008 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -17,9 +17,7 @@ return ENOSYS; \ }) -const char __weak *objname; - -int __weak check(const char *_objname, bool orc) +int __weak check(struct objtool_file *file, bool orc) { UNSUPPORTED("check subcommand"); } -- cgit v1.3.1 From d44becb9decf4438d1e555b1428634964d2e5764 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 25 Aug 2020 13:47:40 +0100 Subject: objtool: Move ORC logic out of check() Now that the objtool_file can be obtained outside of the check function, orc generation builtin no longer requires check to explicitly call its orc related functions. Signed-off-by: Julien Thierry Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf --- tools/objtool/builtin-check.c | 10 +++++++++- tools/objtool/builtin-orc.c | 21 ++++++++++++++++++++- tools/objtool/check.c | 18 +----------------- tools/objtool/objtool.h | 2 +- tools/objtool/weak.c | 2 +- 5 files changed, 32 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 0126ec3bb6c9..c6d199bfd0ae 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -42,6 +42,7 @@ int cmd_check(int argc, const char **argv) { const char *objname, *s; struct objtool_file *file; + int ret; argc = parse_options(argc, argv, check_options, check_usage, 0); @@ -58,5 +59,12 @@ int cmd_check(int argc, const char **argv) if (!file) return 1; - return check(file, false); + ret = check(file); + if (ret) + return ret; + + if (file->elf->changed) + return elf_write(file->elf); + + return 0; } diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 3979f275a775..7b31121fa60b 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -32,6 +32,7 @@ int cmd_orc(int argc, const char **argv) if (!strncmp(argv[0], "gen", 3)) { struct objtool_file *file; + int ret; argc = parse_options(argc, argv, check_options, orc_usage, 0); if (argc != 1) @@ -43,7 +44,25 @@ int cmd_orc(int argc, const char **argv) if (!file) return 1; - return check(file, true); + ret = check(file); + if (ret) + return ret; + + if (list_empty(&file->insn_list)) + return 0; + + ret = create_orc(file); + if (ret) + return ret; + + ret = create_orc_sections(file); + if (ret) + return ret; + + if (!file->elf->changed) + return 0; + + return elf_write(file->elf); } if (!strcmp(argv[0], "dump")) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9d4efa3b12ba..4afc2d5465b9 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2908,7 +2908,7 @@ static int validate_reachable_instructions(struct objtool_file *file) return 0; } -int check(struct objtool_file *file, bool orc) +int check(struct objtool_file *file) { int ret, warnings = 0; @@ -2960,22 +2960,6 @@ int check(struct objtool_file *file, bool orc) goto out; warnings += ret; - if (orc) { - ret = create_orc(file); - if (ret < 0) - goto out; - - ret = create_orc_sections(file); - if (ret < 0) - goto out; - } - - if (file->elf->changed) { - ret = elf_write(file->elf); - if (ret < 0) - goto out; - } - out: if (ret < 0) { /* diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 7efc43f22174..a635f68a9b09 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -22,7 +22,7 @@ struct objtool_file { struct objtool_file *objtool_open_read(const char *_objname); -int check(struct objtool_file *file, bool orc); +int check(struct objtool_file *file); int orc_dump(const char *objname); int create_orc(struct objtool_file *file); int create_orc_sections(struct objtool_file *file); diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index 82698319f008..29180d599b08 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -17,7 +17,7 @@ return ENOSYS; \ }) -int __weak check(struct objtool_file *file, bool orc) +int __weak check(struct objtool_file *file) { UNSUPPORTED("check subcommand"); } -- cgit v1.3.1 From 3eaecac88a17f7fdf29561a197dc728f7f697c60 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 25 Aug 2020 13:47:41 +0100 Subject: objtool: Skip ORC entry creation for non-text sections Orc generation is only done for text sections, but some instructions can be found in non-text sections (e.g. .discard.text sections). Skip setting their orc sections since their whole sections will be skipped for orc generation. Reviewed-by: Miroslav Benes Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/orc_gen.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index e6b2363c2e03..22fe4398197f 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -18,6 +18,9 @@ int create_orc(struct objtool_file *file) struct cfi_reg *cfa = &insn->cfi.cfa; struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; + if (!insn->sec->text) + continue; + orc->end = insn->cfi.end; if (cfa->base == CFI_UNDEFINED) { -- cgit v1.3.1 From 66734e32463bd1346466f92662feeaccef26e94f Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 25 Aug 2020 13:47:42 +0100 Subject: objtool: Define 'struct orc_entry' only when needed Implementation of ORC requires some definitions that are currently provided by the target architecture headers. Do not depend on these definitions when the orc subcommand is not implemented. This avoid requiring arches with no orc implementation to provide dummy orc definitions. Signed-off-by: Julien Thierry Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf --- tools/objtool/Makefile | 4 ++++ tools/objtool/arch.h | 2 ++ tools/objtool/check.h | 2 ++ 3 files changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 7770edcda3a0..33d1e3ca8efd 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -55,6 +55,10 @@ ifeq ($(SRCARCH),x86) SUBCMD_ORC := y endif +ifeq ($(SUBCMD_ORC),y) + CFLAGS += -DINSN_USE_ORC +endif + export SUBCMD_CHECK SUBCMD_ORC export srctree OUTPUT CFLAGS SRCARCH AWK include $(srctree)/tools/build/Makefile.include diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index 2e2ce089b0e9..b18c5f61d42d 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -11,7 +11,9 @@ #include "objtool.h" #include "cfi.h" +#ifdef INSN_USE_ORC #include +#endif enum insn_type { INSN_JUMP_CONDITIONAL, diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 36d38b9153ac..6cac34542122 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -43,7 +43,9 @@ struct instruction { struct symbol *func; struct list_head stack_ops; struct cfi_state cfi; +#ifdef INSN_USE_ORC struct orc_entry orc; +#endif }; struct instruction *find_insn(struct objtool_file *file, -- cgit v1.3.1 From a7863b3423fd5d1ab82161654ba83973764b570b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Sep 2020 14:52:57 -0400 Subject: blk-iocost: update iocost_monitor.py iocost went through significant internal changes. Update iocost_monitor.py accordingly. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- tools/cgroup/iocost_monitor.py | 54 +++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index f4699f9b46ba..c4ff907c078b 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -45,8 +45,7 @@ except: err('The kernel does not have iocost enabled') IOC_RUNNING = prog['IOC_RUNNING'].value_() -NR_USAGE_SLOTS = prog['NR_USAGE_SLOTS'].value_() -HWEIGHT_WHOLE = prog['HWEIGHT_WHOLE'].value_() +WEIGHT_ONE = prog['WEIGHT_ONE'].value_() VTIME_PER_SEC = prog['VTIME_PER_SEC'].value_() VTIME_PER_USEC = prog['VTIME_PER_USEC'].value_() AUTOP_SSD_FAST = prog['AUTOP_SSD_FAST'].value_() @@ -100,7 +99,7 @@ class IocStat: self.period_ms = ioc.period_us.value_() / 1_000 self.period_at = ioc.period_at.value_() / 1_000_000 self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC - self.vrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC + self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC self.busy_level = ioc.busy_level.value_() self.autop_idx = ioc.autop_idx.value_() self.user_cost_model = ioc.user_cost_model.value_() @@ -136,7 +135,7 @@ class IocStat: def table_header_str(self): return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \ - f'{"dbt":>3} {"delay":>6} {"usages%"}' + f'{"debt":>7} {"delay":>7} {"usage%"}' class IocgStat: def __init__(self, iocg): @@ -144,11 +143,11 @@ class IocgStat: blkg = iocg.pd.blkg self.is_active = not list_empty(iocg.active_list.address_of_()) - self.weight = iocg.weight.value_() - self.active = iocg.active.value_() - self.inuse = iocg.inuse.value_() - self.hwa_pct = iocg.hweight_active.value_() * 100 / HWEIGHT_WHOLE - self.hwi_pct = iocg.hweight_inuse.value_() * 100 / HWEIGHT_WHOLE + self.weight = iocg.weight.value_() / WEIGHT_ONE + self.active = iocg.active.value_() / WEIGHT_ONE + self.inuse = iocg.inuse.value_() / WEIGHT_ONE + self.hwa_pct = iocg.hweight_active.value_() * 100 / WEIGHT_ONE + self.hwi_pct = iocg.hweight_inuse.value_() * 100 / WEIGHT_ONE self.address = iocg.value_() vdone = iocg.done_vtime.counter.value_() @@ -160,23 +159,13 @@ class IocgStat: else: self.inflight_pct = 0 - # vdebt used to be an atomic64_t and is now u64, support both - try: - self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 - except: - self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 - - self.use_delay = blkg.use_delay.counter.value_() - self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 - - usage_idx = iocg.usage_idx.value_() - self.usages = [] - self.usage = 0 - for i in range(NR_USAGE_SLOTS): - usage = iocg.usages[(usage_idx + 1 + i) % NR_USAGE_SLOTS].value_() - upct = usage * 100 / HWEIGHT_WHOLE - self.usages.append(upct) - self.usage = max(self.usage, upct) + self.usage = (100 * iocg.usage_delta_us.value_() / + ioc.period_us.value_()) if self.active else 0 + self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 + if blkg.use_delay.counter.value_() != 0: + self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 + else: + self.delay_ms = 0 def dict(self, now, path): out = { 'cgroup' : path, @@ -189,25 +178,20 @@ class IocgStat: 'hweight_inuse_pct' : self.hwi_pct, 'inflight_pct' : self.inflight_pct, 'debt_ms' : self.debt_ms, - 'use_delay' : self.use_delay, 'delay_ms' : self.delay_ms, 'usage_pct' : self.usage, 'address' : self.address } - for i in range(len(self.usages)): - out[f'usage_pct_{i}'] = str(self.usages[i]) return out def table_row_str(self, path): out = f'{path[-28:]:28} ' \ f'{"*" if self.is_active else " "} ' \ - f'{self.inuse:5}/{self.active:5} ' \ + f'{round(self.inuse):5}/{round(self.active):5} ' \ f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \ f'{self.inflight_pct:6.2f} ' \ - f'{min(math.ceil(self.debt_ms), 999):3} ' \ - f'{min(self.use_delay, 99):2}*'\ - f'{min(math.ceil(self.delay_ms), 999):03} ' - for u in self.usages: - out += f'{min(round(u), 999):03d}:' + f'{self.debt_ms:7.2f} ' \ + f'{self.delay_ms:7.2f} '\ + f'{min(self.usage, 999):6.2f}' out = out.rstrip(':') return out -- cgit v1.3.1 From 858e8b2eb4dd6fe6c4640463ad2f2ed3b8249ad4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 1 Sep 2020 19:31:13 -0700 Subject: selftests/bpf: Test task_file iterator without visiting pthreads Modified existing bpf_iter_test_file.c program to check whether all accessed files from the main thread or not. Modified existing bpf_iter_test_file program to check whether all accessed files from the main thread or not. $ ./test_progs -n 4 ... #4/7 task_file:OK ... #4 bpf_iter:OK Summary: 1/24 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200902023113.1672863-1-yhs@fb.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 21 +++++++++++++++++++++ .../selftests/bpf/progs/bpf_iter_task_file.c | 10 +++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 7375d9a6d242..fe1a83b9875c 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -132,17 +132,38 @@ static void test_task_stack(void) bpf_iter_task_stack__destroy(skel); } +static void *do_nothing(void *arg) +{ + pthread_exit(arg); +} + static void test_task_file(void) { struct bpf_iter_task_file *skel; + pthread_t thread_id; + void *ret; skel = bpf_iter_task_file__open_and_load(); if (CHECK(!skel, "bpf_iter_task_file__open_and_load", "skeleton open_and_load failed\n")) return; + skel->bss->tgid = getpid(); + + if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL), + "pthread_create", "pthread_create failed\n")) + goto done; + do_dummy_read(skel->progs.dump_task_file); + if (CHECK(pthread_join(thread_id, &ret) || ret != NULL, + "pthread_join", "pthread_join failed\n")) + goto done; + + CHECK(skel->bss->count != 0, "check_count", + "invalid non pthread file visit count %d\n", skel->bss->count); + +done: bpf_iter_task_file__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index 8b787baa2654..b2f7c7c5f952 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -6,6 +6,9 @@ char _license[] SEC("license") = "GPL"; +int count = 0; +int tgid = 0; + SEC("iter/task_file") int dump_task_file(struct bpf_iter__task_file *ctx) { @@ -17,8 +20,13 @@ int dump_task_file(struct bpf_iter__task_file *ctx) if (task == (void *)0 || file == (void *)0) return 0; - if (ctx->meta->seq_num == 0) + if (ctx->meta->seq_num == 0) { + count = 0; BPF_SEQ_PRINTF(seq, " tgid gid fd file\n"); + } + + if (tgid == task->tgid && task->tgid != task->pid) + count++; BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd, (long)file->f_op); -- cgit v1.3.1 From c6f4c2b027c471838f44de03ef8c4100e4986852 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 2 Sep 2020 13:44:24 +0200 Subject: selftests: mptcp: fix typo in mptcp_connect usage in mptcp_connect, 's' selects IPPROTO_MPTCP / IPPROTO_TCP as the value of 'protocol' in socket(), and 'm' switches between different send / receive modes. Fix die_usage(): swap 'm' and 's' and add missing 'sendfile' mode. Signed-off-by: Davide Caratti Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 090620c3e10c..a54966531a64 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -65,8 +65,8 @@ static void die_usage(void) fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); fprintf(stderr, "\t-p num -- use port num\n"); - fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); - fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); + fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); + fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); exit(1); -- cgit v1.3.1 From 1e44e6e82e7b4d2bae70a8a0b68f7d4f213b0e5f Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 6 Jul 2020 21:03:24 +0200 Subject: Replace HTTP links with HTTPS ones: LKMM Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/references.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt index ecbbaa5396d4..c5fdfd19df24 100644 --- a/tools/memory-model/Documentation/references.txt +++ b/tools/memory-model/Documentation/references.txt @@ -120,7 +120,7 @@ o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and semantics of the weak consistency model specification language - cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531 + cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531 Memory-model comparisons -- cgit v1.3.1 From cc9628b45c9fa9b165a50dbb262928bc529bf35d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 22 Jul 2020 16:08:23 -0700 Subject: tools/memory-model: Update recipes.txt prime_numbers.c path The expand_to_next_prime() and next_prime_number() functions have moved from lib/prime_numbers.c to lib/math/prime_numbers.c, so this commit updates recipes.txt to reflect this change. Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/recipes.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt index 63c4adfed884..03f58b11c252 100644 --- a/tools/memory-model/Documentation/recipes.txt +++ b/tools/memory-model/Documentation/recipes.txt @@ -1,7 +1,7 @@ This document provides "recipes", that is, litmus tests for commonly occurring situations, as well as a few that illustrate subtly broken but attractive nuisances. Many of these recipes include example code from -v4.13 of the Linux kernel. +v5.7 of the Linux kernel. The first section covers simple special cases, the second section takes off the training wheels to cover more involved examples, @@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access first place (control dependency). Note that the term "data dependency" is sometimes casually used to cover both address and data dependencies. -In lib/prime_numbers.c, the expand_to_next_prime() function invokes +In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes rcu_assign_pointer(), and the next_prime_number() function invokes rcu_dereference(). This combination mediates access to a bit vector that is expanded as additional primes are needed. -- cgit v1.3.1 From 984f272be9d7b2dd8b17e35d437e5da500b502ae Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Aug 2020 17:19:54 -0700 Subject: tools/memory-model: Improve litmus-test documentation The current LKMM documentation says very little about litmus tests, and worse yet directs people to the herd7 documentation for more information. Now, the herd7 documentation is quite voluminous and educational, but it is intended for people creating and modifying memory models, not those attempting to use them. This commit therefore updates README and creates a litmus-tests.txt file that gives an overview of litmus-test format and describes ways of modeling various special cases, illustrated with numerous examples. [ paulmck: Add Alan Stern feedback. ] [ paulmck: Apply Dave Chinner feedback. ] [ paulmck: Apply Andrii Nakryiko feedback. ] [ paulmck: Apply Johannes Weiner feedback. ] Link: https://lwn.net/Articles/827180/ Reported-by: Dave Chinner Acked-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/litmus-tests.txt | 1070 +++++++++++++++++++++ tools/memory-model/README | 155 +-- 2 files changed, 1108 insertions(+), 117 deletions(-) create mode 100644 tools/memory-model/Documentation/litmus-tests.txt (limited to 'tools') diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt new file mode 100644 index 000000000000..289a38d626dd --- /dev/null +++ b/tools/memory-model/Documentation/litmus-tests.txt @@ -0,0 +1,1070 @@ +Linux-Kernel Memory Model Litmus Tests +====================================== + +This file describes the LKMM litmus-test format by example, describes +some tricks and traps, and finally outlines LKMM's limitations. Earlier +versions of this material appeared in a number of LWN articles, including: + +https://lwn.net/Articles/720550/ + A formal kernel memory-ordering model (part 2) +https://lwn.net/Articles/608550/ + Axiomatic validation of memory barriers and atomic instructions +https://lwn.net/Articles/470681/ + Validating Memory Barriers and Atomic Instructions + +This document presents information in decreasing order of applicability, +so that, where possible, the information that has proven more commonly +useful is shown near the beginning. + +For information on installing LKMM, including the underlying "herd7" +tool, please see tools/memory-model/README. + + +Copy-Pasta +========== + +As with other software, it is often better (if less macho) to adapt an +existing litmus test than it is to create one from scratch. A number +of litmus tests may be found in the kernel source tree: + + tools/memory-model/litmus-tests/ + Documentation/litmus-tests/ + +Several thousand more example litmus tests are available on github +and kernel.org: + + https://github.com/paulmckrcu/litmus + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus + +The -l and -L arguments to "git grep" can be quite helpful in identifying +existing litmus tests that are similar to the one you need. But even if +you start with an existing litmus test, it is still helpful to have a +good understanding of the litmus-test format. + + +Examples and Format +=================== + +This section describes the overall format of litmus tests, starting +with a small example of the message-passing pattern and moving on to +more complex examples that illustrate explicit initialization and LKMM's +minimalistic set of flow-control statements. + + +Message-Passing Example +----------------------- + +This section gives an overview of the format of a litmus test using an +example based on the common message-passing use case. This use case +appears often in the Linux kernel. For example, a flag (modeled by "y" +below) indicates that a buffer (modeled by "x" below) is now completely +filled in and ready for use. It would be very bad if the consumer saw the +flag set, but, due to memory misordering, saw old values in the buffer. + +This example asks whether smp_store_release() and smp_load_acquire() +suffices to avoid this bad outcome: + + 1 C MP+pooncerelease+poacquireonce + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 WRITE_ONCE(*x, 1); + 8 smp_store_release(y, 1); + 9 } +10 +11 P1(int *x, int *y) +12 { +13 int r0; +14 int r1; +15 +16 r0 = smp_load_acquire(y); +17 r1 = READ_ONCE(*x); +18 } +19 +20 exists (1:r0=1 /\ 1:r1=0) + +Line 1 starts with "C", which identifies this file as being in the +LKMM C-language format (which, as we will see, is a small fragment +of the full C language). The remainder of line 1 is the name of +the test, which by convention is the filename with the ".litmus" +suffix stripped. In this case, the actual test may be found in +tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus +in the Linux-kernel source tree. + +Mechanically generated litmus tests will often have an optional +double-quoted comment string on the second line. Such strings are ignored +when running the test. Yes, you can add your own comments to litmus +tests, but this is a bit involved due to the use of multiple parsers. +For now, you can use C-language comments in the C code, and these comments +may be in either the "/* */" or the "//" style. A later section will +cover the full litmus-test commenting story. + +Line 3 is the initialization section. Because the default initialization +to zero suffices for this test, the "{}" syntax is used, which mean the +initialization section is empty. Litmus tests requiring non-default +initialization must have non-empty initialization sections, as in the +example that will be presented later in this document. + +Lines 5-9 show the first process and lines 11-18 the second process. Each +process corresponds to a Linux-kernel task (or kthread, workqueue, thread, +and so on; LKMM discussions often use these terms interchangeably). +The name of the first process is "P0" and that of the second "P1". +You can name your processes anything you like as long as the names consist +of a single "P" followed by a number, and as long as the numbers are +consecutive starting with zero. This can actually be quite helpful, +for example, a .litmus file matching "^P1(" but not matching "^P2(" +must contain a two-process litmus test. + +The argument list for each function are pointers to the global variables +used by that function. Unlike normal C-language function parameters, the +names are significant. The fact that both P0() and P1() have a formal +parameter named "x" means that these two processes are working with the +same global variable, also named "x". So the "int *x, int *y" on P0() +and P1() mean that both processes are working with two shared global +variables, "x" and "y". Global variables are always passed to processes +by reference, hence "P0(int *x, int *y)", but *never* "P0(int x, int y)". + +P0() has no local variables, but P1() has two of them named "r0" and "r1". +These names may be freely chosen, but for historical reasons stemming from +other litmus-test formats, it is conventional to use names consisting of +"r" followed by a number as shown here. A common bug in litmus tests +is forgetting to add a global variable to a process's parameter list. +This will sometimes result in an error message, but can also cause the +intended global to instead be silently treated as an undeclared local +variable. + +Each process's code is similar to Linux-kernel C, as can be seen on lines +7-8 and 13-17. This code may use many of the Linux kernel's atomic +operations, some of its exclusive-lock functions, and some of its RCU +and SRCU functions. An approximate list of the currently supported +functions may be found in the linux-kernel.def file. + +The P0() process does "WRITE_ONCE(*x, 1)" on line 7. Because "x" is a +pointer in P0()'s parameter list, this does an unordered store to global +variable "x". Line 8 does "smp_store_release(y, 1)", and because "y" +is also in P0()'s parameter list, this does a release store to global +variable "y". + +The P1() process declares two local variables on lines 13 and 14. +Line 16 does "r0 = smp_load_acquire(y)" which does an acquire load +from global variable "y" into local variable "r0". Line 17 does a +"r1 = READ_ONCE(*x)", which does an unordered load from "*x" into local +variable "r1". Both "x" and "y" are in P1()'s parameter list, so both +reference the same global variables that are used by P0(). + +Line 20 is the "exists" assertion expression to evaluate the final state. +This final state is evaluated after the dust has settled: both processes +have completed and all of their memory references and memory barriers +have propagated to all parts of the system. The references to the local +variables "r0" and "r1" in line 24 must be prefixed with "1:" to specify +which process they are local to. + +Note that the assertion expression is written in the litmus-test +language rather than in C. For example, single "=" is an equality +operator rather than an assignment. The "/\" character combination means +"and". Similarly, "\/" stands for "or". Both of these are ASCII-art +representations of the corresponding mathematical symbols. Finally, +"~" stands for "logical not", which is "!" in C, and not to be confused +with the C-language "~" operator which instead stands for "bitwise not". +Parentheses may be used to override precedence. + +The "exists" assertion on line 20 is satisfied if the consumer sees the +flag ("y") set but the buffer ("x") as not yet filled in, that is, if P1() +loaded a value from "x" that was equal to 1 but loaded a value from "y" +that was still equal to zero. + +This example can be checked by running the following command, which +absolutely must be run from the tools/memory-model directory and from +this directory only: + +herd7 -conf linux-kernel.cfg litmus-tests/MP+pooncerelease+poacquireonce.litmus + +The output is the result of something similar to a full state-space +search, and is as follows: + + 1 Test MP+pooncerelease+poacquireonce Allowed + 2 States 3 + 3 1:r0=0; 1:r1=0; + 4 1:r0=0; 1:r1=1; + 5 1:r0=1; 1:r1=1; + 6 No + 7 Witnesses + 8 Positive: 0 Negative: 3 + 9 Condition exists (1:r0=1 /\ 1:r1=0) +10 Observation MP+pooncerelease+poacquireonce Never 0 3 +11 Time MP+pooncerelease+poacquireonce 0.00 +12 Hash=579aaa14d8c35a39429b02e698241d09 + +The most pertinent line is line 10, which contains "Never 0 3", which +indicates that the bad result flagged by the "exists" clause never +happens. This line might instead say "Sometimes" to indicate that the +bad result happened in some but not all executions, or it might say +"Always" to indicate that the bad result happened in all executions. +(The herd7 tool doesn't judge, so it is only an LKMM convention that the +"exists" clause indicates a bad result. To see this, invert the "exists" +clause's condition and run the test.) The numbers ("0 3") at the end +of this line indicate the number of end states satisfying the "exists" +clause (0) and the number not not satisfying that clause (3). + +Another important part of this output is shown in lines 2-5, repeated here: + + 2 States 3 + 3 1:r0=0; 1:r1=0; + 4 1:r0=0; 1:r1=1; + 5 1:r0=1; 1:r1=1; + +Line 2 gives the total number of end states, and each of lines 3-5 list +one of these states, with the first ("1:r0=0; 1:r1=0;") indicating that +both of P1()'s loads returned the value "0". As expected, given the +"Never" on line 10, the state flagged by the "exists" clause is not +listed. This full list of states can be helpful when debugging a new +litmus test. + +The rest of the output is not normally needed, either due to irrelevance +or due to being redundant with the lines discussed above. However, the +following paragraph lists them for the benefit of readers possessed of +an insatiable curiosity. Other readers should feel free to skip ahead. + +Line 1 echos the test name, along with the "Test" and "Allowed". Line 6's +"No" says that the "exists" clause was not satisfied by any execution, +and as such it has the same meaning as line 10's "Never". Line 7 is a +lead-in to line 8's "Positive: 0 Negative: 3", which lists the number +of end states satisfying and not satisfying the "exists" clause, just +like the two numbers at the end of line 10. Line 9 repeats the "exists" +clause so that you don't have to look it up in the litmus-test file. +The number at the end of line 11 (which begins with "Time") gives the +time in seconds required to analyze the litmus test. Small tests such +as this one complete in a few milliseconds, so "0.00" is quite common. +Line 12 gives a hash of the contents for the litmus-test file, and is used +by tooling that manages litmus tests and their output. This tooling is +used by people modifying LKMM itself, and among other things lets such +people know which of the several thousand relevant litmus tests were +affected by a given change to LKMM. + + +Initialization +-------------- + +The previous example relied on the default zero initialization for +"x" and "y", but a similar litmus test could instead initialize them +to some other value: + + 1 C MP+pooncerelease+poacquireonce + 2 + 3 { + 4 x=42; + 5 y=42; + 6 } + 7 + 8 P0(int *x, int *y) + 9 { +10 WRITE_ONCE(*x, 1); +11 smp_store_release(y, 1); +12 } +13 +14 P1(int *x, int *y) +15 { +16 int r0; +17 int r1; +18 +19 r0 = smp_load_acquire(y); +20 r1 = READ_ONCE(*x); +21 } +22 +23 exists (1:r0=1 /\ 1:r1=42) + +Lines 3-6 now initialize both "x" and "y" to the value 42. This also +means that the "exists" clause on line 23 must change "1:r1=0" to +"1:r1=42". + +Running the test gives the same overall result as before, but with the +value 42 appearing in place of the value zero: + + 1 Test MP+pooncerelease+poacquireonce Allowed + 2 States 3 + 3 1:r0=1; 1:r1=1; + 4 1:r0=42; 1:r1=1; + 5 1:r0=42; 1:r1=42; + 6 No + 7 Witnesses + 8 Positive: 0 Negative: 3 + 9 Condition exists (1:r0=1 /\ 1:r1=42) +10 Observation MP+pooncerelease+poacquireonce Never 0 3 +11 Time MP+pooncerelease+poacquireonce 0.02 +12 Hash=ab9a9b7940a75a792266be279a980156 + +It is tempting to avoid the open-coded repetitions of the value "42" +by defining another global variable "initval=42" and replacing all +occurrences of "42" with "initval". This will not, repeat *not*, +initialize "x" and "y" to 42, but instead to the address of "initval" +(try it!). See the section below on linked lists to learn more about +why this approach to initialization can be useful. + + +Control Structures +------------------ + +LKMM supports the C-language "if" statement, which allows modeling of +conditional branches. In LKMM, conditional branches can affect ordering, +but only if you are *very* careful (compilers are surprisingly able +to optimize away conditional branches). The following example shows +the "load buffering" (LB) use case that is used in the Linux kernel to +synchronize between ring-buffer producers and consumers. In the example +below, P0() is one side checking to see if an operation may proceed and +P1() is the other side completing its update. + + 1 C LB+fencembonceonce+ctrlonceonce + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r0; + 8 + 9 r0 = READ_ONCE(*x); +10 if (r0) +11 WRITE_ONCE(*y, 1); +12 } +13 +14 P1(int *x, int *y) +15 { +16 int r0; +17 +18 r0 = READ_ONCE(*y); +19 smp_mb(); +20 WRITE_ONCE(*x, 1); +21 } +22 +23 exists (0:r0=1 /\ 1:r0=1) + +P1()'s "if" statement on line 10 works as expected, so that line 11 is +executed only if line 9 loads a non-zero value from "x". Because P1()'s +write of "1" to "x" happens only after P1()'s read from "y", one would +hope that the "exists" clause cannot be satisfied. LKMM agrees: + + 1 Test LB+fencembonceonce+ctrlonceonce Allowed + 2 States 2 + 3 0:r0=0; 1:r0=0; + 4 0:r0=1; 1:r0=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (0:r0=1 /\ 1:r0=1) + 9 Observation LB+fencembonceonce+ctrlonceonce Never 0 2 +10 Time LB+fencembonceonce+ctrlonceonce 0.00 +11 Hash=e5260556f6de495fd39b556d1b831c3b + +However, there is no "while" statement due to the fact that full +state-space search has some difficulty with iteration. However, there +are tricks that may be used to handle some special cases, which are +discussed below. In addition, loop-unrolling tricks may be applied, +albeit sparingly. + + +Tricks and Traps +================ + +This section covers extracting debug output from herd7, emulating +spin loops, handling trivial linked lists, adding comments to litmus tests, +emulating call_rcu(), and finally tricks to improve herd7 performance +in order to better handle large litmus tests. + + +Debug Output +------------ + +By default, the herd7 state output includes all variables mentioned +in the "exists" clause. But sometimes debugging efforts are greatly +aided by the values of other variables. Consider this litmus test +(tools/memory-order/litmus-tests/SB+rfionceonce-poonceonces.litmus but +slightly modified), which probes an obscure corner of hardware memory +ordering: + + 1 C SB+rfionceonce-poonceonces + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r1; + 8 int r2; + 9 +10 WRITE_ONCE(*x, 1); +11 r1 = READ_ONCE(*x); +12 r2 = READ_ONCE(*y); +13 } +14 +15 P1(int *x, int *y) +16 { +17 int r3; +18 int r4; +19 +20 WRITE_ONCE(*y, 1); +21 r3 = READ_ONCE(*y); +22 r4 = READ_ONCE(*x); +23 } +24 +25 exists (0:r2=0 /\ 1:r4=0) + +The herd7 output is as follows: + + 1 Test SB+rfionceonce-poonceonces Allowed + 2 States 4 + 3 0:r2=0; 1:r4=0; + 4 0:r2=0; 1:r4=1; + 5 0:r2=1; 1:r4=0; + 6 0:r2=1; 1:r4=1; + 7 Ok + 8 Witnesses + 9 Positive: 1 Negative: 3 +10 Condition exists (0:r2=0 /\ 1:r4=0) +11 Observation SB+rfionceonce-poonceonces Sometimes 1 3 +12 Time SB+rfionceonce-poonceonces 0.01 +13 Hash=c7f30fe0faebb7d565405d55b7318ada + +(This output indicates that CPUs are permitted to "snoop their own +store buffers", which all of Linux's CPU families other than s390 will +happily do. Such snooping results in disagreement among CPUs on the +order of stores from different CPUs, which is rarely an issue.) + +But the herd7 output shows only the two variables mentioned in the +"exists" clause. Someone modifying this test might wish to know the +values of "x", "y", "0:r1", and "0:r3" as well. The "locations" +statement on line 25 shows how to cause herd7 to display additional +variables: + + 1 C SB+rfionceonce-poonceonces + 2 + 3 {} + 4 + 5 P0(int *x, int *y) + 6 { + 7 int r1; + 8 int r2; + 9 +10 WRITE_ONCE(*x, 1); +11 r1 = READ_ONCE(*x); +12 r2 = READ_ONCE(*y); +13 } +14 +15 P1(int *x, int *y) +16 { +17 int r3; +18 int r4; +19 +20 WRITE_ONCE(*y, 1); +21 r3 = READ_ONCE(*y); +22 r4 = READ_ONCE(*x); +23 } +24 +25 locations [0:r1; 1:r3; x; y] +26 exists (0:r2=0 /\ 1:r4=0) + +The herd7 output then displays the values of all the variables: + + 1 Test SB+rfionceonce-poonceonces Allowed + 2 States 4 + 3 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=0; x=1; y=1; + 4 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=1; x=1; y=1; + 5 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=0; x=1; y=1; + 6 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=1; x=1; y=1; + 7 Ok + 8 Witnesses + 9 Positive: 1 Negative: 3 +10 Condition exists (0:r2=0 /\ 1:r4=0) +11 Observation SB+rfionceonce-poonceonces Sometimes 1 3 +12 Time SB+rfionceonce-poonceonces 0.01 +13 Hash=40de8418c4b395388f6501cafd1ed38d + +What if you would like to know the value of a particular global variable +at some particular point in a given process's execution? One approach +is to use a READ_ONCE() to load that global variable into a new local +variable, then add that local variable to the "locations" clause. +But be careful: In some litmus tests, adding a READ_ONCE() will change +the outcome! For one example, please see the C-READ_ONCE.litmus and +C-READ_ONCE-omitted.litmus tests located here: + + https://github.com/paulmckrcu/litmus/blob/master/manual/kernel/ + + +Spin Loops +---------- + +The analysis carried out by herd7 explores full state space, which is +at best of exponential time complexity. Adding processes and increasing +the amount of code in a give process can greatly increase execution time. +Potentially infinite loops, such as those used to wait for locks to +become available, are clearly problematic. + +Fortunately, it is possible to avoid state-space explosion by specially +modeling such loops. For example, the following litmus tests emulates +locking using xchg_acquire(), but instead of enclosing xchg_acquire() +in a spin loop, it instead excludes executions that fail to acquire the +lock using a herd7 "filter" clause. Note that for exclusive locking, you +are better off using the spin_lock() and spin_unlock() that LKMM directly +models, if for no other reason that these are much faster. However, the +techniques illustrated in this section can be used for other purposes, +such as emulating reader-writer locking, which LKMM does not yet model. + + 1 C C-SB+l-o-o-u+l-o-o-u-X + 2 + 3 { + 4 } + 5 + 6 P0(int *sl, int *x0, int *x1) + 7 { + 8 int r2; + 9 int r1; +10 +11 r2 = xchg_acquire(sl, 1); +12 WRITE_ONCE(*x0, 1); +13 r1 = READ_ONCE(*x1); +14 smp_store_release(sl, 0); +15 } +16 +17 P1(int *sl, int *x0, int *x1) +18 { +19 int r2; +20 int r1; +21 +22 r2 = xchg_acquire(sl, 1); +23 WRITE_ONCE(*x1, 1); +24 r1 = READ_ONCE(*x0); +25 smp_store_release(sl, 0); +26 } +27 +28 filter (0:r2=0 /\ 1:r2=0) +29 exists (0:r1=0 /\ 1:r1=0) + +This litmus test may be found here: + +https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd/C-SB+l-o-o-u+l-o-o-u-X.litmus + +This test uses two global variables, "x1" and "x2", and also emulates a +single global spinlock named "sl". This spinlock is held by whichever +process changes the value of "sl" from "0" to "1", and is released when +that process sets "sl" back to "0". P0()'s lock acquisition is emulated +on line 11 using xchg_acquire(), which unconditionally stores the value +"1" to "sl" and stores either "0" or "1" to "r2", depending on whether +the lock acquisition was successful or unsuccessful (due to "sl" already +having the value "1"), respectively. P1() operates in a similar manner. + +Rather unconventionally, execution appears to proceed to the critical +section on lines 12 and 13 in either case. Line 14 then uses an +smp_store_release() to store zero to "sl", thus emulating lock release. + +The case where xchg_acquire() fails to acquire the lock is handled by +the "filter" clause on line 28, which tells herd7 to keep only those +executions in which both "0:r2" and "1:r2" are zero, that is to pay +attention only to those executions in which both locks are actually +acquired. Thus, the bogus executions that would execute the critical +sections are discarded and any effects that they might have had are +ignored. Note well that the "filter" clause keeps those executions +for which its expression is satisfied, that is, for which the expression +evaluates to true. In other words, the "filter" clause says what to +keep, not what to discard. + +The result of running this test is as follows: + + 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed + 2 States 2 + 3 0:r1=0; 1:r1=1; + 4 0:r1=1; 1:r1=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (0:r1=0 /\ 1:r1=0) + 9 Observation C-SB+l-o-o-u+l-o-o-u-X Never 0 2 +10 Time C-SB+l-o-o-u+l-o-o-u-X 0.03 + +The "Never" on line 9 indicates that this use of xchg_acquire() and +smp_store_release() really does correctly emulate locking. + +Why doesn't the litmus test take the simpler approach of using a spin loop +to handle failed spinlock acquisitions, like the kernel does? The key +insight behind this litmus test is that spin loops have no effect on the +possible "exists"-clause outcomes of program execution in the absence +of deadlock. In other words, given a high-quality lock-acquisition +primitive in a deadlock-free program running on high-quality hardware, +each lock acquisition will eventually succeed. Because herd7 already +explores the full state space, the length of time required to actually +acquire the lock does not matter. After all, herd7 already models all +possible durations of the xchg_acquire() statements. + +Why not just add the "filter" clause to the "exists" clause, thus +avoiding the "filter" clause entirely? This does work, but is slower. +The reason that the "filter" clause is faster is that (in the common case) +herd7 knows to abandon an execution as soon as the "filter" expression +fails to be satisfied. In contrast, the "exists" clause is evaluated +only at the end of time, thus requiring herd7 to waste time on bogus +executions in which both critical sections proceed concurrently. In +addition, some LKMM users like the separation of concerns provided by +using the both the "filter" and "exists" clauses. + +Readers lacking a pathological interest in odd corner cases should feel +free to skip the remainder of this section. + +But what if the litmus test were to temporarily set "0:r2" to a non-zero +value? Wouldn't that cause herd7 to abandon the execution prematurely +due to an early mismatch of the "filter" clause? + +Why not just try it? Line 4 of the following modified litmus test +introduces a new global variable "x2" that is initialized to "1". Line 23 +of P1() reads that variable into "1:r2" to force an early mismatch with +the "filter" clause. Line 24 does a known-true "if" condition to avoid +and static analysis that herd7 might do. Finally the "exists" clause +on line 32 is updated to a condition that is alway satisfied at the end +of the test. + + 1 C C-SB+l-o-o-u+l-o-o-u-X + 2 + 3 { + 4 x2=1; + 5 } + 6 + 7 P0(int *sl, int *x0, int *x1) + 8 { + 9 int r2; +10 int r1; +11 +12 r2 = xchg_acquire(sl, 1); +13 WRITE_ONCE(*x0, 1); +14 r1 = READ_ONCE(*x1); +15 smp_store_release(sl, 0); +16 } +17 +18 P1(int *sl, int *x0, int *x1, int *x2) +19 { +20 int r2; +21 int r1; +22 +23 r2 = READ_ONCE(*x2); +24 if (r2) +25 r2 = xchg_acquire(sl, 1); +26 WRITE_ONCE(*x1, 1); +27 r1 = READ_ONCE(*x0); +28 smp_store_release(sl, 0); +29 } +30 +31 filter (0:r2=0 /\ 1:r2=0) +32 exists (x1=1) + +If the "filter" clause were to check each variable at each point in the +execution, running this litmus test would display no executions because +all executions would be filtered out at line 23. However, the output +is instead as follows: + + 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed + 2 States 1 + 3 x1=1; + 4 Ok + 5 Witnesses + 6 Positive: 2 Negative: 0 + 7 Condition exists (x1=1) + 8 Observation C-SB+l-o-o-u+l-o-o-u-X Always 2 0 + 9 Time C-SB+l-o-o-u+l-o-o-u-X 0.04 +10 Hash=080bc508da7f291e122c6de76c0088e3 + +Line 3 shows that there is one execution that did not get filtered out, +so the "filter" clause is evaluated only on the last assignment to +the variables that it checks. In this case, the "filter" clause is a +disjunction, so it might be evaluated twice, once at the final (and only) +assignment to "0:r2" and once at the final assignment to "1:r2". + + +Linked Lists +------------ + +LKMM can handle linked lists, but only linked lists in which each node +contains nothing except a pointer to the next node in the list. This is +of course quite restrictive, but there is nevertheless quite a bit that +can be done within these confines, as can be seen in the litmus test +at tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus: + + 1 C MP+onceassign+derefonce + 2 + 3 { + 4 y=z; + 5 z=0; + 6 } + 7 + 8 P0(int *x, int **y) + 9 { +10 WRITE_ONCE(*x, 1); +11 rcu_assign_pointer(*y, x); +12 } +13 +14 P1(int *x, int **y) +15 { +16 int *r0; +17 int r1; +18 +19 rcu_read_lock(); +20 r0 = rcu_dereference(*y); +21 r1 = READ_ONCE(*r0); +22 rcu_read_unlock(); +23 } +24 +25 exists (1:r0=x /\ 1:r1=0) + +Line 4's "y=z" may seem odd, given that "z" has not yet been initialized. +But "y=z" does not set the value of "y" to that of "z", but instead +sets the value of "y" to the *address* of "z". Lines 4 and 5 therefore +create a simple linked list, with "y" pointing to "z" and "z" having a +NULL pointer. A much longer linked list could be created if desired, +and circular singly linked lists can also be created and manipulated. + +The "exists" clause works the same way, with the "1:r0=x" comparing P1()'s +"r0" not to the value of "x", but again to its address. This term of the +"exists" clause therefore tests whether line 20's load from "y" saw the +value stored by line 11, which is in fact what is required in this case. + +P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x" +from "y", replacing "z". + +P1()'s line 20 loads a pointer from "y", and line 21 dereferences that +pointer. The RCU read-side critical section spanning lines 19-22 is +just for show in this example. + +Running this test results in the following: + + 1 Test MP+onceassign+derefonce Allowed + 2 States 2 + 3 1:r0=x; 1:r1=1; + 4 1:r0=z; 1:r1=0; + 5 No + 6 Witnesses + 7 Positive: 0 Negative: 2 + 8 Condition exists (1:r0=x /\ 1:r1=0) + 9 Observation MP+onceassign+derefonce Never 0 2 +10 Time MP+onceassign+derefonce 0.00 +11 Hash=49ef7a741563570102448a256a0c8568 + +The only possible outcomes feature P1() loading a pointer to "z" +(which contains zero) on the one hand and P1() loading a pointer to "x" +(which contains the value one) on the other. This should be reassuring +because it says that RCU readers cannot see the old preinitialization +values when accessing a newly inserted list node. This undesirable +scenario is flagged by the "exists" clause, and would occur if P1() +loaded a pointer to "x", but obtained the pre-initialization value of +zero after dereferencing that pointer. + + +Comments +-------- + +Different portions of a litmus test are processed by different parsers, +which has the charming effect of requiring different comment syntax in +different portions of the litmus test. The C-syntax portions use +C-language comments (either "/* */" or "//"), while the other portions +use Ocaml comments "(* *)". + +The following litmus test illustrates the comment style corresponding +to each syntactic unit of the test: + + 1 C MP+onceassign+derefonce (* A *) + 2 + 3 (* B *) + 4 + 5 { + 6 y=z; (* C *) + 7 z=0; + 8 } // D + 9 +10 // E +11 +12 P0(int *x, int **y) // F +13 { +14 WRITE_ONCE(*x, 1); // G +15 rcu_assign_pointer(*y, x); +16 } +17 +18 // H +19 +20 P1(int *x, int **y) +21 { +22 int *r0; +23 int r1; +24 +25 rcu_read_lock(); +26 r0 = rcu_dereference(*y); +27 r1 = READ_ONCE(*r0); +28 rcu_read_unlock(); +29 } +30 +31 // I +32 +33 exists (* J *) (1:r0=x /\ (* K *) 1:r1=0) (* L *) + +In short, use C-language comments in the C code and Ocaml comments in +the rest of the litmus test. + +On the other hand, if you prefer C-style comments everywhere, the +C preprocessor is your friend. + + +Asynchronous RCU Grace Periods +------------------------------ + +The following litmus test is derived from the example show in +Documentation/litmus-tests/rcu/RCU+sync+free.litmus, but converted to +emulate call_rcu(): + + 1 C RCU+sync+free + 2 + 3 { + 4 int x = 1; + 5 int *y = &x; + 6 int z = 1; + 7 } + 8 + 9 P0(int *x, int *z, int **y) +10 { +11 int *r0; +12 int r1; +13 +14 rcu_read_lock(); +15 r0 = rcu_dereference(*y); +16 r1 = READ_ONCE(*r0); +17 rcu_read_unlock(); +18 } +19 +20 P1(int *z, int **y, int *c) +21 { +22 rcu_assign_pointer(*y, z); +23 smp_store_release(*c, 1); // Emulate call_rcu(). +24 } +25 +26 P2(int *x, int *z, int **y, int *c) +27 { +28 int r0; +29 +30 r0 = smp_load_acquire(*c); // Note call_rcu() request. +31 synchronize_rcu(); // Wait one grace period. +32 WRITE_ONCE(*x, 0); // Emulate the RCU callback. +33 } +34 +35 filter (2:r0=1) (* Reject too-early starts. *) +36 exists (0:r0=x /\ 0:r1=0) + +Lines 4-6 initialize a linked list headed by "y" that initially contains +"x". In addition, "z" is pre-initialized to prepare for P1(), which +will replace "x" with "z" in this list. + +P0() on lines 9-18 enters an RCU read-side critical section, loads the +list header "y" and dereferences it, leaving the node in "0:r0" and +the node's value in "0:r1". + +P1() on lines 20-24 updates the list header to instead reference "z", +then emulates call_rcu() by doing a release store into "c". + +P2() on lines 27-33 emulates the behind-the-scenes effect of doing a +call_rcu(). Line 30 first does an acquire load from "c", then line 31 +waits for an RCU grace period to elapse, and finally line 32 emulates +the RCU callback, which in turn emulates a call to kfree(). + +Of course, it is possible for P2() to start too soon, so that the +value of "2:r0" is zero rather than the required value of "1". +The "filter" clause on line 35 handles this possibility, rejecting +all executions in which "2:r0" is not equal to the value "1". + + +Performance +----------- + +LKMM's exploration of the full state-space can be extremely helpful, +but it does not come for free. The price is exponential computational +complexity in terms of the number of processes, the average number +of statements in each process, and the total number of stores in the +litmus test. + +So it is best to start small and then work up. Where possible, break +your code down into small pieces each representing a core concurrency +requirement. + +That said, herd7 is quite fast. On an unprepossessing x86 laptop, it +was able to analyze the following 10-process RCU litmus test in about +six seconds. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R.litmus + +One way to make herd7 run faster is to use the "-speedcheck true" option. +This option prevents herd7 from generating all possible end states, +instead causing it to focus solely on whether or not the "exists" +clause can be satisfied. With this option, herd7 evaluates the above +litmus test in about 300 milliseconds, for more than an order of magnitude +improvement in performance. + +Larger 16-process litmus tests that would normally consume 15 minutes +of time complete in about 40 seconds with this option. To be fair, +you do get an extra 65,535 states when you leave off the "-speedcheck +true" option. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R.litmus + +Nevertheless, litmus-test analysis really is of exponential complexity, +whether with or without "-speedcheck true". Increasing by just three +processes to a 19-process litmus test requires 2 hours and 40 minutes +without, and about 8 minutes with "-speedcheck true". Each of these +results represent roughly an order of magnitude slowdown compared to the +16-process litmus test. Again, to be fair, the multi-hour run explores +no fewer than 524,287 additional states compared to the shorter one. + +https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R.litmus + +If you don't like command-line arguments, you can obtain a similar speedup +by adding a "filter" clause with exactly the same expression as your +"exists" clause. + +However, please note that seeing the full set of states can be extremely +helpful when developing and debugging litmus tests. + + +LIMITATIONS +=========== + +Limitations of the Linux-kernel memory model (LKMM) include: + +1. Compiler optimizations are not accurately modeled. Of course, + the use of READ_ONCE() and WRITE_ONCE() limits the compiler's + ability to optimize, but under some circumstances it is possible + for the compiler to undermine the memory model. For more + information, see Documentation/explanation.txt (in particular, + the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING" + sections). + + Note that this limitation in turn limits LKMM's ability to + accurately model address, control, and data dependencies. + For example, if the compiler can deduce the value of some variable + carrying a dependency, then the compiler can break that dependency + by substituting a constant of that value. + +2. Multiple access sizes for a single variable are not supported, + and neither are misaligned or partially overlapping accesses. + +3. Exceptions and interrupts are not modeled. In some cases, + this limitation can be overcome by modeling the interrupt or + exception with an additional process. + +4. I/O such as MMIO or DMA is not supported. + +5. Self-modifying code (such as that found in the kernel's + alternatives mechanism, function tracer, Berkeley Packet Filter + JIT compiler, and module loader) is not supported. + +6. Complete modeling of all variants of atomic read-modify-write + operations, locking primitives, and RCU is not provided. + For example, call_rcu() and rcu_barrier() are not supported. + However, a substantial amount of support is provided for these + operations, as shown in the linux-kernel.def file. + + Here are specific limitations: + + a. When rcu_assign_pointer() is passed NULL, the Linux + kernel provides no ordering, but LKMM models this + case as a store release. + + b. The "unless" RMW operations are not currently modeled: + atomic_long_add_unless(), atomic_inc_unless_negative(), + and atomic_dec_unless_positive(). These can be emulated + in litmus tests, for example, by using atomic_cmpxchg(). + + One exception of this limitation is atomic_add_unless(), + which is provided directly by herd7 (so no corresponding + definition in linux-kernel.def). atomic_add_unless() is + modeled by herd7 therefore it can be used in litmus tests. + + c. The call_rcu() function is not modeled. As was shown above, + it can be emulated in litmus tests by adding another + process that invokes synchronize_rcu() and the body of the + callback function, with (for example) a release-acquire + from the site of the emulated call_rcu() to the beginning + of the additional process. + + d. The rcu_barrier() function is not modeled. It can be + emulated in litmus tests emulating call_rcu() via + (for example) a release-acquire from the end of each + additional call_rcu() process to the site of the + emulated rcu-barrier(). + + e. Although sleepable RCU (SRCU) is now modeled, there + are some subtle differences between its semantics and + those in the Linux kernel. For example, the kernel + might interpret the following sequence as two partially + overlapping SRCU read-side critical sections: + + 1 r1 = srcu_read_lock(&my_srcu); + 2 do_something_1(); + 3 r2 = srcu_read_lock(&my_srcu); + 4 do_something_2(); + 5 srcu_read_unlock(&my_srcu, r1); + 6 do_something_3(); + 7 srcu_read_unlock(&my_srcu, r2); + + In contrast, LKMM will interpret this as a nested pair of + SRCU read-side critical sections, with the outer critical + section spanning lines 1-7 and the inner critical section + spanning lines 3-5. + + This difference would be more of a concern had anyone + identified a reasonable use case for partially overlapping + SRCU read-side critical sections. For more information + on the trickiness of such overlapping, please see: + https://paulmck.livejournal.com/40593.html + + f. Reader-writer locking is not modeled. It can be + emulated in litmus tests using atomic read-modify-write + operations. + +The fragment of the C language supported by these litmus tests is quite +limited and in some ways non-standard: + +1. There is no automatic C-preprocessor pass. You can of course + run it manually, if you choose. + +2. There is no way to create functions other than the Pn() functions + that model the concurrent processes. + +3. The Pn() functions' formal parameters must be pointers to the + global shared variables. Nothing can be passed by value into + these functions. + +4. The only functions that can be invoked are those built directly + into herd7 or that are defined in the linux-kernel.def file. + +5. The "switch", "do", "for", "while", and "goto" C statements are + not supported. The "switch" statement can be emulated by the + "if" statement. The "do", "for", and "while" statements can + often be emulated by manually unrolling the loop, or perhaps by + enlisting the aid of the C preprocessor to minimize the resulting + code duplication. Some uses of "goto" can be emulated by "if", + and some others by unrolling. + +6. Although you can use a wide variety of types in litmus-test + variable declarations, and especially in global-variable + declarations, the "herd7" tool understands only int and + pointer types. There is no support for floating-point types, + enumerations, characters, strings, arrays, or structures. + +7. Parsing of variable declarations is very loose, with almost no + type checking. + +8. Initializers differ from their C-language counterparts. + For example, when an initializer contains the name of a shared + variable, that name denotes a pointer to that variable, not + the current value of that variable. For example, "int x = y" + is interpreted the way "int x = &y" would be in C. + +9. Dynamic memory allocation is not supported, although this can + be worked around in some cases by supplying multiple statically + allocated variables. + +Some of these limitations may be overcome in the future, but others are +more likely to be addressed by incorporating the Linux-kernel memory model +into other tools. + +Finally, please note that LKMM is subject to change as hardware, use cases, +and compilers evolve. diff --git a/tools/memory-model/README b/tools/memory-model/README index ecb7385376bf..d2e03c4f52a0 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -63,10 +63,32 @@ BASIC USAGE: HERD7 ================== The memory model is used, in conjunction with "herd7", to exhaustively -explore the state space of small litmus tests. +explore the state space of small litmus tests. Documentation describing +the format, features, capabilities and limitations of these litmus +tests is available in tools/memory-model/Documentation/litmus-tests.txt. -For example, to run SB+fencembonceonces.litmus against the memory model: +Example litmus tests may be found in the Linux-kernel source tree: + tools/memory-model/litmus-tests/ + Documentation/litmus-tests/ + +Several thousand more example litmus tests are available here: + + https://github.com/paulmckrcu/litmus + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd + https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus + +Documentation describing litmus tests and now to use them may be found +here: + + tools/memory-model/Documentation/litmus-tests.txt + +The remainder of this section uses the SB+fencembonceonces.litmus test +located in the tools/memory-model directory. + +To run SB+fencembonceonces.litmus against the memory model: + + $ cd $LINUX_SOURCE_TREE/tools/memory-model $ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus Here is the corresponding output: @@ -87,7 +109,11 @@ Here is the corresponding output: The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that this litmus test's "exists" clause can not be satisfied. -See "herd7 -help" or "herdtools7/doc/" for more information. +See "herd7 -help" or "herdtools7/doc/" for more information on running the +tool itself, but please be aware that this documentation is intended for +people who work on the memory model itself, that is, people making changes +to the tools/memory-model/linux-kernel.* files. It is not intended for +people focusing on writing, understanding, and running LKMM litmus tests. ===================== @@ -124,7 +150,11 @@ that during two million trials, the state specified in this litmus test's "exists" clause was not reached. And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/" -for more information. +for more information. And again, please be aware that this documentation +is intended for people who work on the memory model itself, that is, +people making changes to the tools/memory-model/linux-kernel.* files. +It is not intended for people focusing on writing, understanding, and +running LKMM litmus tests. ==================== @@ -137,6 +167,10 @@ Documentation/cheatsheet.txt Documentation/explanation.txt Describes the memory model in detail. +Documentation/litmus-tests.txt + Describes the format, features, capabilities, and limitations + of the litmus tests that LKMM can evaluate. + Documentation/recipes.txt Lists common memory-ordering patterns. @@ -187,116 +221,3 @@ README This file. scripts Various scripts, see scripts/README. - - -=========== -LIMITATIONS -=========== - -The Linux-kernel memory model (LKMM) has the following limitations: - -1. Compiler optimizations are not accurately modeled. Of course, - the use of READ_ONCE() and WRITE_ONCE() limits the compiler's - ability to optimize, but under some circumstances it is possible - for the compiler to undermine the memory model. For more - information, see Documentation/explanation.txt (in particular, - the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING" - sections). - - Note that this limitation in turn limits LKMM's ability to - accurately model address, control, and data dependencies. - For example, if the compiler can deduce the value of some variable - carrying a dependency, then the compiler can break that dependency - by substituting a constant of that value. - -2. Multiple access sizes for a single variable are not supported, - and neither are misaligned or partially overlapping accesses. - -3. Exceptions and interrupts are not modeled. In some cases, - this limitation can be overcome by modeling the interrupt or - exception with an additional process. - -4. I/O such as MMIO or DMA is not supported. - -5. Self-modifying code (such as that found in the kernel's - alternatives mechanism, function tracer, Berkeley Packet Filter - JIT compiler, and module loader) is not supported. - -6. Complete modeling of all variants of atomic read-modify-write - operations, locking primitives, and RCU is not provided. - For example, call_rcu() and rcu_barrier() are not supported. - However, a substantial amount of support is provided for these - operations, as shown in the linux-kernel.def file. - - a. When rcu_assign_pointer() is passed NULL, the Linux - kernel provides no ordering, but LKMM models this - case as a store release. - - b. The "unless" RMW operations are not currently modeled: - atomic_long_add_unless(), atomic_inc_unless_negative(), - and atomic_dec_unless_positive(). These can be emulated - in litmus tests, for example, by using atomic_cmpxchg(). - - One exception of this limitation is atomic_add_unless(), - which is provided directly by herd7 (so no corresponding - definition in linux-kernel.def). atomic_add_unless() is - modeled by herd7 therefore it can be used in litmus tests. - - c. The call_rcu() function is not modeled. It can be - emulated in litmus tests by adding another process that - invokes synchronize_rcu() and the body of the callback - function, with (for example) a release-acquire from - the site of the emulated call_rcu() to the beginning - of the additional process. - - d. The rcu_barrier() function is not modeled. It can be - emulated in litmus tests emulating call_rcu() via - (for example) a release-acquire from the end of each - additional call_rcu() process to the site of the - emulated rcu-barrier(). - - e. Although sleepable RCU (SRCU) is now modeled, there - are some subtle differences between its semantics and - those in the Linux kernel. For example, the kernel - might interpret the following sequence as two partially - overlapping SRCU read-side critical sections: - - 1 r1 = srcu_read_lock(&my_srcu); - 2 do_something_1(); - 3 r2 = srcu_read_lock(&my_srcu); - 4 do_something_2(); - 5 srcu_read_unlock(&my_srcu, r1); - 6 do_something_3(); - 7 srcu_read_unlock(&my_srcu, r2); - - In contrast, LKMM will interpret this as a nested pair of - SRCU read-side critical sections, with the outer critical - section spanning lines 1-7 and the inner critical section - spanning lines 3-5. - - This difference would be more of a concern had anyone - identified a reasonable use case for partially overlapping - SRCU read-side critical sections. For more information, - please see: https://paulmck.livejournal.com/40593.html - - f. Reader-writer locking is not modeled. It can be - emulated in litmus tests using atomic read-modify-write - operations. - -The "herd7" tool has some additional limitations of its own, apart from -the memory model: - -1. Non-trivial data structures such as arrays or structures are - not supported. However, pointers are supported, allowing trivial - linked lists to be constructed. - -2. Dynamic memory allocation is not supported, although this can - be worked around in some cases by supplying multiple statically - allocated variables. - -Some of these limitations may be overcome in the future, but others are -more likely to be addressed by incorporating the Linux-kernel memory model -into other tools. - -Finally, please note that LKMM is subject to change as hardware, use cases, -and compilers evolve. -- cgit v1.3.1 From 0b8c06b75ea143f3c68aa419c36e82d9ab7454f8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Aug 2020 10:58:55 -0700 Subject: tools/memory-model: Add a simple entry point document Current LKMM documentation assumes that the reader already understands concurrency in the Linux kernel, which won't necessarily always be the case. This commit supplies a simple.txt file that provides a starting point for someone who is new to concurrency in the Linux kernel. That said, this file might also useful as a reminder to experienced developers of simpler approaches to dealing with concurrency. Link: Link: https://lwn.net/Articles/827180/ [ paulmck: Apply feedback from Joel Fernandes. ] Co-developed-by: Dave Chinner Signed-off-by: Dave Chinner Co-developed-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/litmus-tests.txt | 8 +- tools/memory-model/Documentation/simple.txt | 271 ++++++++++++++++++++++ tools/memory-model/README | 5 + 3 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 tools/memory-model/Documentation/simple.txt (limited to 'tools') diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt index 289a38d626dd..2f840dcd15cf 100644 --- a/tools/memory-model/Documentation/litmus-tests.txt +++ b/tools/memory-model/Documentation/litmus-tests.txt @@ -726,8 +726,12 @@ P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x" from "y", replacing "z". P1()'s line 20 loads a pointer from "y", and line 21 dereferences that -pointer. The RCU read-side critical section spanning lines 19-22 is -just for show in this example. +pointer. The RCU read-side critical section spanning lines 19-22 is just +for show in this example. Note that the address used for line 21's load +depends on (in this case, "is exactly the same as") the value loaded by +line 20. This is an example of what is called an "address dependency". +This particular address dependency extends from the load on line 20 to the +load on line 21. Address dependencies provide a weak form of ordering. Running this test results in the following: diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt new file mode 100644 index 000000000000..81e1a0ec5342 --- /dev/null +++ b/tools/memory-model/Documentation/simple.txt @@ -0,0 +1,271 @@ +This document provides options for those wishing to keep their +memory-ordering lives simple, as is necessary for those whose domain +is complex. After all, there are bugs other than memory-ordering bugs, +and the time spent gaining memory-ordering knowledge is not available +for gaining domain knowledge. Furthermore Linux-kernel memory model +(LKMM) is quite complex, with subtle differences in code often having +dramatic effects on correctness. + +The options near the beginning of this list are quite simple. The idea +is not that kernel hackers don't already know about them, but rather +that they might need the occasional reminder. + +Please note that this is a generic guide, and that specific subsystems +will often have special requirements or idioms. For example, developers +of MMIO-based device drivers will often need to use mb(), rmb(), and +wmb(), and therefore might find smp_mb(), smp_rmb(), and smp_wmb() +to be more natural than smp_load_acquire() and smp_store_release(). +On the other hand, those coming in from other environments will likely +be more familiar with these last two. + + +Single-threaded code +==================== + +In single-threaded code, there is no reordering, at least assuming +that your toolchain and hardware are working correctly. In addition, +it is generally a mistake to assume your code will only run in a single +threaded context as the kernel can enter the same code path on multiple +CPUs at the same time. One important exception is a function that makes +no external data references. + +In the general case, you will need to take explicit steps to ensure that +your code really is executed within a single thread that does not access +shared variables. A simple way to achieve this is to define a global lock +that you acquire at the beginning of your code and release at the end, +taking care to ensure that all references to your code's shared data are +also carried out under that same lock. Because only one thread can hold +this lock at a given time, your code will be executed single-threaded. +This approach is called "code locking". + +Code locking can severely limit both performance and scalability, so it +should be used with caution, and only on code paths that execute rarely. +After all, a huge amount of effort was required to remove the Linux +kernel's old "Big Kernel Lock", so let's please be very careful about +adding new "little kernel locks". + +One of the advantages of locking is that, in happy contrast with the +year 1981, almost all kernel developers are very familiar with locking. +The Linux kernel's lockdep (CONFIG_PROVE_LOCKING=y) is very helpful with +the formerly feared deadlock scenarios. + +Please use the standard locking primitives provided by the kernel rather +than rolling your own. For one thing, the standard primitives interact +properly with lockdep. For another thing, these primitives have been +tuned to deal better with high contention. And for one final thing, it is +surprisingly hard to correctly code production-quality lock acquisition +and release functions. After all, even simple non-production-quality +locking functions must carefully prevent both the CPU and the compiler +from moving code in either direction across the locking function. + +Despite the scalability limitations of single-threaded code, RCU +takes this approach for much of its grace-period processing and also +for early-boot operation. The reason RCU is able to scale despite +single-threaded grace-period processing is use of batching, where all +updates that accumulated during one grace period are handled by the +next one. In other words, slowing down grace-period processing makes +it more efficient. Nor is RCU unique: Similar batching optimizations +are used in many I/O operations. + + +Packaged code +============= + +Even if performance and scalability concerns prevent your code from +being completely single-threaded, it is often possible to use library +functions that handle the concurrency nearly or entirely on their own. +This approach delegates any LKMM worries to the library maintainer. + +In the kernel, what is the "library"? Quite a bit. It includes the +contents of the lib/ directory, much of the include/linux/ directory along +with a lot of other heavily used APIs. But heavily used examples include +the list macros (for example, include/linux/{,rcu}list.h), workqueues, +smp_call_function(), and the various hash tables and search trees. + + +Data locking +============ + +With code locking, we use single-threaded code execution to guarantee +serialized access to the data that the code is accessing. However, +we can also achieve this by instead associating the lock with specific +instances of the data structures. This creates a "critical section" +in the code execution that will execute as though it is single threaded. +By placing all the accesses and modifications to a shared data structure +inside a critical section, we ensure that the execution context that +holds the lock has exclusive access to the shared data. + +The poster boy for this approach is the hash table, where placing a lock +in each hash bucket allows operations on different buckets to proceed +concurrently. This works because the buckets do not overlap with each +other, so that an operation on one bucket does not interfere with any +other bucket. + +As the number of buckets increases, data locking scales naturally. +In particular, if the amount of data increases with the number of CPUs, +increasing the number of buckets as the number of CPUs increase results +in a naturally scalable data structure. + + +Per-CPU processing +================== + +Partitioning processing and data over CPUs allows each CPU to take +a single-threaded approach while providing excellent performance and +scalability. Of course, there is no free lunch: The dark side of this +excellence is substantially increased memory footprint. + +In addition, it is sometimes necessary to occasionally update some global +view of this processing and data, in which case something like locking +must be used to protect this global view. This is the approach taken +by the percpu_counter infrastructure. In many cases, there are already +generic/library variants of commonly used per-cpu constructs available. +Please use them rather than rolling your own. + +RCU uses DEFINE_PER_CPU*() declaration to create a number of per-CPU +data sets. For example, each CPU does private quiescent-state processing +within its instance of the per-CPU rcu_data structure, and then uses data +locking to report quiescent states up the grace-period combining tree. + + +Packaged primitives: Sequence locking +===================================== + +Lockless programming is considered by many to be more difficult than +lock-based programming, but there are a few lockless design patterns that +have been built out into an API. One of these APIs is sequence locking. +Although this APIs can be used in extremely complex ways, there are simple +and effective ways of using it that avoid the need to pay attention to +memory ordering. + +The basic keep-things-simple rule for sequence locking is "do not write +in read-side code". Yes, you can do writes from within sequence-locking +readers, but it won't be so simple. For example, such writes will be +lockless and should be idempotent. + +For more sophisticated use cases, LKMM can guide you, including use +cases involving combining sequence locking with other synchronization +primitives. (LKMM does not yet know about sequence locking, so it is +currently necessary to open-code it in your litmus tests.) + +Additional information may be found in include/linux/seqlock.h. + +Packaged primitives: RCU +======================== + +Another lockless design pattern that has been baked into an API +is RCU. The Linux kernel makes sophisticated use of RCU, but the +keep-things-simple rules for RCU are "do not write in read-side code" +and "do not update anything that is visible to and accessed by readers", +and "protect updates with locking". + +These rules are illustrated by the functions foo_update_a() and +foo_get_a() shown in Documentation/RCU/whatisRCU.rst. Additional +RCU usage patterns maybe found in Documentation/RCU and in the +source code. + + +Packaged primitives: Atomic operations +====================================== + +Back in the day, the Linux kernel had three types of atomic operations: + +1. Initialization and read-out, such as atomic_set() and atomic_read(). + +2. Operations that did not return a value and provided no ordering, + such as atomic_inc() and atomic_dec(). + +3. Operations that returned a value and provided full ordering, such as + atomic_add_return() and atomic_dec_and_test(). Note that some + value-returning operations provide full ordering only conditionally. + For example, cmpxchg() provides ordering only upon success. + +More recent kernels have operations that return a value but do not +provide full ordering. These are flagged with either a _relaxed() +suffix (providing no ordering), or an _acquire() or _release() suffix +(providing limited ordering). + +Additional information may be found in these files: + +Documentation/atomic_t.txt +Documentation/atomic_bitops.txt +Documentation/core-api/atomic_ops.rst +Documentation/core-api/refcount-vs-atomic.rst + +Reading code using these primitives is often also quite helpful. + + +Lockless, fully ordered +======================= + +When using locking, there often comes a time when it is necessary +to access some variable or another without holding the data lock +that serializes access to that variable. + +If you want to keep things simple, use the initialization and read-out +operations from the previous section only when there are no racing +accesses. Otherwise, use only fully ordered operations when accessing +or modifying the variable. This approach guarantees that code prior +to a given access to that variable will be seen by all CPUs has having +happened before any code following any later access to that same variable. + +Please note that per-CPU functions are not atomic operations and +hence they do not provide any ordering guarantees at all. + +If the lockless accesses are frequently executed reads that are used +only for heuristics, or if they are frequently executed writes that +are used only for statistics, please see the next section. + + +Lockless statistics and heuristics +================================== + +Unordered primitives such as atomic_read(), atomic_set(), READ_ONCE(), and +WRITE_ONCE() can safely be used in some cases. These primitives provide +no ordering, but they do prevent the compiler from carrying out a number +of destructive optimizations (for which please see the next section). +One example use for these primitives is statistics, such as per-CPU +counters exemplified by the rt_cache_stat structure's routing-cache +statistics counters. Another example use case is heuristics, such as +the jiffies_till_first_fqs and jiffies_till_next_fqs kernel parameters +controlling how often RCU scans for idle CPUs. + +But be careful. "Unordered" really does mean "unordered". It is all +too easy to assume ordering, and this assumption must be avoided when +using these primitives. + + +Don't let the compiler trip you up +================================== + +It can be quite tempting to use plain C-language accesses for lockless +loads from and stores to shared variables. Although this is both +possible and quite common in the Linux kernel, it does require a +surprising amount of analysis, care, and knowledge about the compiler. +Yes, some decades ago it was not unfair to consider a C compiler to be +an assembler with added syntax and better portability, but the advent of +sophisticated optimizing compilers mean that those days are long gone. +Today's optimizing compilers can profoundly rewrite your code during the +translation process, and have long been ready, willing, and able to do so. + +Therefore, if you really need to use C-language assignments instead of +READ_ONCE(), WRITE_ONCE(), and so on, you will need to have a very good +understanding of both the C standard and your compiler. Here are some +introductory references and some tooling to start you on this noble quest: + +Who's afraid of a big bad optimizing compiler? + https://lwn.net/Articles/793253/ +Calibrating your fear of big bad optimizing compilers + https://lwn.net/Articles/799218/ +Concurrency bugs should fear the big bad data-race detector (part 1) + https://lwn.net/Articles/816850/ +Concurrency bugs should fear the big bad data-race detector (part 2) + https://lwn.net/Articles/816854/ + + +More complex use cases +====================== + +If the alternatives above do not do what you need, please look at the +recipes-pairs.txt file to peel off the next layer of the memory-ordering +onion. diff --git a/tools/memory-model/README b/tools/memory-model/README index d2e03c4f52a0..c8144d4aafa0 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -177,6 +177,11 @@ Documentation/recipes.txt Documentation/references.txt Provides background reading. +Documentation/simple.txt + Starting point for someone new to Linux-kernel concurrency. + And also for those needing a reminder of the simpler approaches + to concurrency! + linux-kernel.bell Categorizes the relevant instructions, including memory references, memory barriers, atomic read-modify-write operations, -- cgit v1.3.1 From d76bb7a09bb3b8711077912f3e80cfcf39cd9d0b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 27 May 2020 00:38:38 -0400 Subject: tools/power turbostat: Print /dev/cpu_dma_latency Users are puzzled when they use tuned performance and all their C-states vanish. Dump /dev/cpu_dma_latency and state whether the value is default, or constraining, to explain this situation. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 33b370865d16..4c679568fda4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4698,6 +4698,32 @@ unsigned int intel_model_duplicates(unsigned int model) } return model; } + +void print_dev_latency(void) +{ + char *path = "/dev/cpu_dma_latency"; + int fd; + int value; + int retval; + + fd = open(path, O_RDONLY); + if (fd < 0) { + warn("fopen %s\n", path); + return; + } + + retval = read(fd, (void *)&value, sizeof(int)); + if (retval != sizeof(int)) { + warn("read %s\n", path); + close(fd); + return; + } + fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", + value, value == 2000000000 ? "default" : "constrained"); + + close(fd); +} + void process_cpuid() { unsigned int eax, ebx, ecx, edx; @@ -4966,6 +4992,8 @@ void process_cpuid() if (!quiet) dump_cstate_pstate_config_info(family, model); + if (!quiet) + print_dev_latency(); if (!quiet) dump_sysfs_cstate_config(); if (!quiet) -- cgit v1.3.1 From 9aefc2cda6353f48708415d9adc5dff4deb73412 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Thu, 26 Mar 2020 13:36:37 -0700 Subject: tools/power turbostat: Always print idle in the system configuration header If the --quiet option is not used, turbostat prints a useful system configuration header during startup. But inclusion of idle system configuration information in this header is currently a function of inclusion in the columns chosen to be displayed. Always list this idle system configuration. Signed-off-by: Doug Smythies Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4c679568fda4..4edad3fc760a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3530,9 +3530,6 @@ dump_sysfs_cstate_config(void) int state; char *sp; - if (!DO_BIC(BIC_sysfs)) - return; - if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { fprintf(outf, "cpuidle not loaded\n"); return; -- cgit v1.3.1 From 7c2ccc507bd44d17227930181f937b2066565349 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:31:47 +0800 Subject: tools/power turbostat: Make the energy variable to be 64 bit Change the energy variable from 32bit to 64bit, so that it can record long time duration. After this conversion, adjust the DELTA_WRAP32() accordingly. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4edad3fc760a..29ec1018852d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -211,12 +211,12 @@ struct pkg_data { long long gfx_rc6_ms; unsigned int gfx_mhz; unsigned int package_id; - unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ - unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ - unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ - unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ - unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ - unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ + unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ + unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ + unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */ + unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */ + unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ + unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned long long counter[MAX_ADDED_COUNTERS]; } *package_even, *package_odd; @@ -858,13 +858,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "pc10: %016llX\n", p->pc10); outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); - outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); - outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); - outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); - outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram); - outp += sprintf(outp, "Throttle PKG: %0X\n", + outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg); + outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores); + outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx); + outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram); + outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status); - outp += sprintf(outp, "Throttle RAM: %0X\n", + outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); @@ -1210,11 +1210,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ } #define DELTA_WRAP32(new, old) \ - if (new > old) { \ - old = new - old; \ - } else { \ - old = 0x100000000 + new - old; \ - } + old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); int delta_package(struct pkg_data *new, struct pkg_data *old) -- cgit v1.3.1 From 87e15da95775a2ffb8c444e84f08ca982b758364 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:31:57 +0800 Subject: tools/power turbostat: Introduce functions to accumulate RAPL consumption Since the RAPL Joule Counter is 32 bit, turbostat would only print a *star* instead of printing the actual energy consumed to indicate the overflow due to long duration. This does not meet the requirement from servers as the sampling time of turbostat is usually very long on servers. So maintain a set of MSR buffer, and update them periodically before the 32bit MSR register is wrapped round, so as to avoid the overflow. The idea is similar to the implementation of ktime_get(): Periodical MSR timer: total_rapl_sum += (current_rapl_msr - last_rapl_msr); Using get_msr_sum() to get the accumulated RAPL: return (current_rapl_msr - last_rapl_msr) + total_rapl_sum; The accumulated RAPL mechanism will be turned on in next patch. Originally-by: Aaron Lu Reviewed-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 2 +- tools/power/x86/turbostat/turbostat.c | 224 +++++++++++++++++++++++++++++++++- 2 files changed, 219 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 2b6551269e43..d08765531bcb 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -16,7 +16,7 @@ override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c @mkdir -p $(BUILD_OUTPUT) - $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap -lrt .PHONY : clean clean : diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 29ec1018852d..c1759f6c84a8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -259,6 +259,113 @@ struct msr_counter { #define SYSFS_PERCPU (1 << 1) }; +/* + * The accumulated sum of MSR is defined as a monotonic + * increasing MSR, it will be accumulated periodically, + * despite its register's bit width. + */ +enum { + IDX_PKG_ENERGY, + IDX_DRAM_ENERGY, + IDX_PP0_ENERGY, + IDX_PP1_ENERGY, + IDX_PKG_PERF, + IDX_DRAM_PERF, + IDX_COUNT, +}; + +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); + +struct msr_sum_array { + /* get_msr_sum() = sum + (get_msr() - last) */ + struct { + /*The accumulated MSR value is updated by the timer*/ + unsigned long long sum; + /*The MSR footprint recorded in last timer*/ + unsigned long long last; + } entries[IDX_COUNT]; +}; + +/* The percpu MSR sum array.*/ +struct msr_sum_array *per_cpu_msr_sum; + +int idx_to_offset(int idx) +{ + int offset; + + switch (idx) { + case IDX_PKG_ENERGY: + offset = MSR_PKG_ENERGY_STATUS; + break; + case IDX_DRAM_ENERGY: + offset = MSR_DRAM_ENERGY_STATUS; + break; + case IDX_PP0_ENERGY: + offset = MSR_PP0_ENERGY_STATUS; + break; + case IDX_PP1_ENERGY: + offset = MSR_PP1_ENERGY_STATUS; + break; + case IDX_PKG_PERF: + offset = MSR_PKG_PERF_STATUS; + break; + case IDX_DRAM_PERF: + offset = MSR_DRAM_PERF_STATUS; + break; + default: + offset = -1; + } + return offset; +} + +int offset_to_idx(int offset) +{ + int idx; + + switch (offset) { + case MSR_PKG_ENERGY_STATUS: + idx = IDX_PKG_ENERGY; + break; + case MSR_DRAM_ENERGY_STATUS: + idx = IDX_DRAM_ENERGY; + break; + case MSR_PP0_ENERGY_STATUS: + idx = IDX_PP0_ENERGY; + break; + case MSR_PP1_ENERGY_STATUS: + idx = IDX_PP1_ENERGY; + break; + case MSR_PKG_PERF_STATUS: + idx = IDX_PKG_PERF; + break; + case MSR_DRAM_PERF_STATUS: + idx = IDX_DRAM_PERF; + break; + default: + idx = -1; + } + return idx; +} + +int idx_valid(int idx) +{ + switch (idx) { + case IDX_PKG_ENERGY: + return do_rapl & RAPL_PKG; + case IDX_DRAM_ENERGY: + return do_rapl & RAPL_DRAM; + case IDX_PP0_ENERGY: + return do_rapl & RAPL_CORES_ENERGY_STATUS; + case IDX_PP1_ENERGY: + return do_rapl & RAPL_GFX; + case IDX_PKG_PERF: + return do_rapl & RAPL_PKG_PERF_STATUS; + case IDX_DRAM_PERF: + return do_rapl & RAPL_DRAM_PERF_STATUS; + default: + return 0; + } +} struct sys_counters { unsigned int added_thread_counters; unsigned int added_core_counters; @@ -1250,12 +1357,12 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->gfx_mhz = new->gfx_mhz; - DELTA_WRAP32(new->energy_pkg, old->energy_pkg); - DELTA_WRAP32(new->energy_cores, old->energy_cores); - DELTA_WRAP32(new->energy_gfx, old->energy_gfx); - DELTA_WRAP32(new->energy_dram, old->energy_dram); - DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); - DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); + old->energy_pkg = new->energy_pkg - old->energy_pkg; + old->energy_cores = new->energy_cores - old->energy_cores; + old->energy_gfx = new->energy_gfx - old->energy_gfx; + old->energy_dram = new->energy_dram - old->energy_dram; + old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status; + old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -3053,6 +3160,111 @@ void do_sleep(void) } } +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) +{ + int ret, idx; + unsigned long long msr_cur, msr_last; + + if (!per_cpu_msr_sum) + return 1; + + idx = offset_to_idx(offset); + if (idx < 0) + return idx; + /* get_msr_sum() = sum + (get_msr() - last) */ + ret = get_msr(cpu, offset, &msr_cur); + if (ret) + return ret; + msr_last = per_cpu_msr_sum[cpu].entries[idx].last; + DELTA_WRAP32(msr_cur, msr_last); + *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; + + return 0; +} + +timer_t timerid; + +/* Timer callback, update the sum of MSRs periodically. */ +static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i, ret; + int cpu = t->cpu_id; + + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { + unsigned long long msr_cur, msr_last; + int offset; + + if (!idx_valid(i)) + continue; + offset = idx_to_offset(i); + if (offset < 0) + continue; + ret = get_msr(cpu, offset, &msr_cur); + if (ret) { + fprintf(outf, "Can not update msr(0x%x)\n", offset); + continue; + } + + msr_last = per_cpu_msr_sum[cpu].entries[i].last; + per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; + + DELTA_WRAP32(msr_cur, msr_last); + per_cpu_msr_sum[cpu].entries[i].sum += msr_last; + } + return 0; +} + +static void +msr_record_handler(union sigval v) +{ + for_all_cpus(update_msr_sum, EVEN_COUNTERS); +} + +void msr_sum_record(void) +{ + struct itimerspec its; + struct sigevent sev; + + per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); + if (!per_cpu_msr_sum) { + fprintf(outf, "Can not allocate memory for long time MSR.\n"); + return; + } + /* + * Signal handler might be restricted, so use thread notifier instead. + */ + memset(&sev, 0, sizeof(struct sigevent)); + sev.sigev_notify = SIGEV_THREAD; + sev.sigev_notify_function = msr_record_handler; + + sev.sigev_value.sival_ptr = &timerid; + if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { + fprintf(outf, "Can not create timer.\n"); + goto release_msr; + } + + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 1; + /* + * A wraparound time has been calculated early. + * Some sources state that the peak power for a + * microprocessor is usually 1.5 times the TDP rating, + * use 2 * TDP for safety. + */ + its.it_interval.tv_sec = rapl_joule_counter_range / 2; + its.it_interval.tv_nsec = 0; + + if (timer_settime(timerid, 0, &its, NULL) == -1) { + fprintf(outf, "Can not set timer.\n"); + goto release_timer; + } + return; + + release_timer: + timer_delete(timerid); + release_msr: + free(per_cpu_msr_sum); +} void turbostat_loop() { -- cgit v1.3.1 From 9972d5d84d76982606806b2ce887f70c2f8ba60a Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:32:05 +0800 Subject: tools/power turbostat: Enable accumulate RAPL display Enable the accumulated RAPL display by default. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 38 +++++++++++++++-------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c1759f6c84a8..66c468262020 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1169,14 +1169,7 @@ int format_counters(struct thread_data *t, struct core_data *c, } } - /* - * If measurement interval exceeds minimum RAPL Joule Counter range, - * indicate that results are suspect by printing "**" in fraction place. - */ - if (interval_float < rapl_joule_counter_range) - fmt8 = "%s%.2f"; - else - fmt8 = "%6.0f**"; + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); @@ -2069,39 +2062,39 @@ retry: p->sys_lpi = cpuidle_cur_sys_lpi_us; if (do_rapl & RAPL_PKG) { - if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; - p->energy_pkg = msr & 0xFFFFFFFF; + p->energy_pkg = msr; } if (do_rapl & RAPL_CORES_ENERGY_STATUS) { - if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr)) return -14; - p->energy_cores = msr & 0xFFFFFFFF; + p->energy_cores = msr; } if (do_rapl & RAPL_DRAM) { - if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) return -15; - p->energy_dram = msr & 0xFFFFFFFF; + p->energy_dram = msr; } if (do_rapl & RAPL_GFX) { - if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr)) return -16; - p->energy_gfx = msr & 0xFFFFFFFF; + p->energy_gfx = msr; } if (do_rapl & RAPL_PKG_PERF_STATUS) { - if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr)) return -16; - p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; + p->rapl_pkg_perf_status = msr; } if (do_rapl & RAPL_DRAM_PERF_STATUS) { - if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr)) return -16; - p->rapl_dram_perf_status = msr & 0xFFFFFFFF; + p->rapl_dram_perf_status = msr; } if (do_rapl & RAPL_AMD_F17H) { - if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr)) + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr)) return -13; - p->energy_pkg = msr & 0xFFFFFFFF; + p->energy_pkg = msr; } if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) @@ -6101,6 +6094,7 @@ int main(int argc, char **argv) return 0; } + msr_sum_record(); /* * if any params left, it must be a command to fork */ -- cgit v1.3.1 From 8201a0285789fade1c5b031914577e2b27a64f05 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 29 Jun 2020 15:26:57 -0400 Subject: tools/power turbostat: Use sched_getcpu() instead of hardcoded cpu 0 Disabling cpu 0 results in an error turbostat: /sys/devices/system/cpu/cpu0/topology/thread_siblings: open failed: No such file or directory Use sched_getcpu() instead of a hardcoded cpu 0 to get the max cpu number. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 66c468262020..151a70f2311b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2865,12 +2865,19 @@ void re_initialize(void) void set_max_cpu_num(void) { FILE *filep; + int base_cpu; unsigned long dummy; + char pathname[64]; + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(1, "cannot find calling cpu ID"); + sprintf(pathname, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", + base_cpu); + + filep = fopen_or_die(pathname, "r"); topo.max_cpu_num = 0; - filep = fopen_or_die( - "/sys/devices/system/cpu/cpu0/topology/thread_siblings", - "r"); while (fscanf(filep, "%lx,", &dummy) == 1) topo.max_cpu_num += BITMASK_SIZE; fclose(filep); -- cgit v1.3.1 From b88cad57d4d32bb5c53cd8e0ce3a1971062142af Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 8 Jul 2020 12:55:30 +0200 Subject: tools/power turbostat: Replace HTTP links with HTTPS ones: TURBOSTAT UTILITY Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index a6db83a88e85..f6b7e85b121c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -335,7 +335,7 @@ that they count at TSC rate, which is true on all processors tested to date. .SH REFERENCES Volume 3B: System Programming Guide" -http://www.intel.com/products/processor/manuals/ +https://www.intel.com/products/processor/manuals/ .SH FILES .ta -- cgit v1.3.1 From fecb3bc839df64761cc63c9ee9b45c1cad36aee8 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Mon, 10 Aug 2020 10:43:30 -0400 Subject: tools/power turbostat: Fix output formatting for ACPI CST enumeration turbostat formatting is broken with ACPI CST for enumeration. The problem is that the CX_ACPI% is eight characters long which does not work with tab formatting. One simple solution is to remove the underbar from the state name such that C1_ACPI will be displayed as C1ACPI. Signed-off-by: David Arcari Cc: Len Brown Cc: linux-kernel@vger.kernel.org Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 151a70f2311b..56b93e0d9415 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3682,6 +3682,20 @@ int has_config_tdp(unsigned int family, unsigned int model) } } +static void +remove_underbar(char *s) +{ + char *to = s; + + while (*s) { + if (*s != '_') + *to++ = *s; + s++; + } + + *to = 0; +} + static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { @@ -3764,6 +3778,8 @@ dump_sysfs_cstate_config(void) *sp = '\0'; fclose(input); + remove_underbar(name_buf); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); input = fopen(path, "r"); @@ -5830,6 +5846,8 @@ void probe_sysfs(void) *sp = '%'; *(sp + 1) = '\0'; + remove_underbar(name_buf); + fclose(input); sprintf(path, "cpuidle/state%d/time", state); @@ -5857,6 +5875,8 @@ void probe_sysfs(void) *sp = '\0'; fclose(input); + remove_underbar(name_buf); + sprintf(path, "cpuidle/state%d/usage", state); if (is_deferred_skip(name_buf)) -- cgit v1.3.1 From e7af1ed3fa4756e8df8270a8635d852a94266061 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 13 Aug 2020 19:06:03 -0400 Subject: tools/power turbostat: Support additional CPU model numbers Initial support for models recently added to intel-family.h. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 56b93e0d9415..4ee4e3067681 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4906,6 +4906,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ICELAKE_NNPI: case INTEL_FAM6_TIGERLAKE_L: case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + case INTEL_FAM6_LAKEFIELD: + case INTEL_FAM6_ALDERLAKE: return INTEL_FAM6_CANNONLAKE_L; case INTEL_FAM6_ATOM_TREMONT_D: @@ -4915,6 +4918,7 @@ unsigned int intel_model_duplicates(unsigned int model) return INTEL_FAM6_ATOM_TREMONT; case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: return INTEL_FAM6_SKYLAKE_X; } return model; -- cgit v1.3.1 From c315a09b1b0f491c27d46e9d05f397023a44fb81 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 13 Aug 2020 19:18:22 -0400 Subject: tools/power turbostat: Skip pc8, pc9, pc10 columns, if they are disabled Like we skip PC3 and PC6 columns when the package C-state limit disables them, skip PC8/PC9/CP10 under analogous conditions. Reported-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4ee4e3067681..72c0b19db36e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5186,9 +5186,12 @@ void process_cpuid() BIC_NOT_PRESENT(BIC_Pkgpc7); } if (has_c8910_msrs(family, model)) { - BIC_PRESENT(BIC_Pkgpc8); - BIC_PRESENT(BIC_Pkgpc9); - BIC_PRESENT(BIC_Pkgpc10); + if (pkg_cstate_limit >= PCL__8) + BIC_PRESENT(BIC_Pkgpc8); + if (pkg_cstate_limit >= PCL__9) + BIC_PRESENT(BIC_Pkgpc9); + if (pkg_cstate_limit >= PCL_10) + BIC_PRESENT(BIC_Pkgpc10); } do_irtl_hsw = has_c8910_msrs(family, model); if (has_skl_msrs(family, model)) { -- cgit v1.3.1 From 0936cdfbb527a4fa2559292069ebff2e8cf2c843 Mon Sep 17 00:00:00 2001 From: Ondřej Lysoněk Date: Fri, 27 Mar 2020 08:27:12 +0100 Subject: tools/power x86_energy_perf_policy: Input/output error in a VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've encountered an issue with x86_energy_perf_policy. If I run it on a machine that I'm told is a qemu-kvm virtual machine running inside a privileged container, I get the following error: x86_energy_perf_policy: /dev/cpu/0/msr offset 0x1ad read failed: Input/output error I get the same error in a Digital Ocean droplet, so that might be a similar environment. I created the following patch which is intended to give a more user-friendly message. It's based on a patch for turbostat from Prarit Bhargava that was posted some time ago. The patch is "[v2] turbostat: Running on virtual machine is not supported" [1]. Given my limited knowledge of the topic, I can't say with confidence that this is the right solution, though (that's why this is not an official patch submission). Also, I'm not sure what the convention with exit codes is in this tool. Also, instead of the error message, perhaps the tool should just not print anything in this case, which is how it behaves in a "regular" VM? [1] https://patchwork.kernel.org/patch/9868587/ Signed-off-by: Ondřej Lysoněk Signed-off-by: Len Brown --- .../x86_energy_perf_policy.c | 67 +++++++++++++++++----- 1 file changed, 54 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 3fe1eed900d4..ff6c6661f075 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -622,6 +622,57 @@ void cmdline(int argc, char **argv) } } +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, "r"); + + if (!filep) + err(1, "%s: open failed", path); + return filep; +} + +void err_on_hypervisor(void) +{ + FILE *cpuinfo; + char *flags, *hypervisor; + char *buffer; + + /* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */ + cpuinfo = fopen_or_die("/proc/cpuinfo", "ro"); + + buffer = malloc(4096); + if (!buffer) { + fclose(cpuinfo); + err(-ENOMEM, "buffer malloc fail"); + } + + if (!fread(buffer, 1024, 1, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + + flags = strstr(buffer, "flags"); + rewind(cpuinfo); + fseek(cpuinfo, flags - buffer, SEEK_SET); + if (!fgets(buffer, 4096, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + fclose(cpuinfo); + + hypervisor = strstr(buffer, "hypervisor"); + + free(buffer); + + if (hypervisor) + err(-1, + "not supported on this virtual machine"); +} int get_msr(int cpu, int offset, unsigned long long *msr) { @@ -635,8 +686,10 @@ int get_msr(int cpu, int offset, unsigned long long *msr) err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); retval = pread(fd, msr, sizeof(*msr), offset); - if (retval != sizeof(*msr)) + if (retval != sizeof(*msr)) { + err_on_hypervisor(); err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + } if (debug > 1) fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr); @@ -1086,18 +1139,6 @@ int update_cpu_msrs(int cpu) return 0; } -/* - * Open a file, and exit on failure - */ -FILE *fopen_or_die(const char *path, const char *mode) -{ - FILE *filep = fopen(path, "r"); - - if (!filep) - err(1, "%s: open failed", path); - return filep; -} - unsigned int get_pkg_num(int cpu) { FILE *fp; -- cgit v1.3.1 From b4b9156953fea108a9540c262e48eafeeff99ab0 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Wed, 22 Apr 2020 15:02:07 -0700 Subject: tools/power turbostat: Add a new GFXAMHz column that exposes gt_act_freq_mhz. The column already present called GFXMHz reads from gt_cur_freq_mhz, which represents the GT frequency that was requested, but power management might not be able to do that. So the new column will display what the actual frequency GT is running at. Signed-off-by: Rafael Antognolli Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 50 +++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 72c0b19db36e..7a6e91aedf0f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -79,6 +79,7 @@ unsigned long long gfx_cur_rc6_ms; unsigned long long cpuidle_cur_cpu_lpi_us; unsigned long long cpuidle_cur_sys_lpi_us; unsigned int gfx_cur_mhz; +unsigned int gfx_act_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; double rapl_power_units, rapl_time_units; @@ -210,6 +211,7 @@ struct pkg_data { unsigned long long pkg_both_core_gfxe_c0; long long gfx_rc6_ms; unsigned int gfx_mhz; + unsigned int gfx_act_mhz; unsigned int package_id; unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ @@ -558,6 +560,7 @@ struct msr_counter bic[] = { { 0x0, "APIC" }, { 0x0, "X2APIC" }, { 0x0, "Die" }, + { 0x0, "GFXAMHz" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -612,6 +615,7 @@ struct msr_counter bic[] = { #define BIC_APIC (1ULL << 48) #define BIC_X2APIC (1ULL << 49) #define BIC_Die (1ULL << 50) +#define BIC_GFXACTMHz (1ULL << 51) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -831,6 +835,9 @@ void print_header(char *delim) if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); if (DO_BIC(BIC_Any_c0)) @@ -1198,6 +1205,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); + /* GFXACTMHz */ + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); @@ -1349,6 +1360,7 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; old->gfx_mhz = new->gfx_mhz; + old->gfx_act_mhz = new->gfx_act_mhz; old->energy_pkg = new->energy_pkg - old->energy_pkg; old->energy_cores = new->energy_cores - old->energy_cores; @@ -1565,6 +1577,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->gfx_rc6_ms = 0; p->gfx_mhz = 0; + p->gfx_act_mhz = 0; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) t->counter[i] = 0; @@ -1660,6 +1673,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.gfx_rc6_ms = p->gfx_rc6_ms; average.packages.gfx_mhz = p->gfx_mhz; + average.packages.gfx_act_mhz = p->gfx_act_mhz; average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); @@ -2108,6 +2122,9 @@ retry: if (DO_BIC(BIC_GFXMHz)) p->gfx_mhz = gfx_cur_mhz; + if (DO_BIC(BIC_GFXACTMHz)) + p->gfx_act_mhz = gfx_act_mhz; + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &p->counter[i])) return -10; @@ -3018,6 +3035,33 @@ int snapshot_gfx_mhz(void) return 0; } +/* + * snapshot_gfx_cur_mhz() + * + * record snapshot of + * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_act_mhz(void) +{ + static FILE *fp; + int retval; + + if (fp == NULL) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); + else { + rewind(fp); + fflush(fp); + } + + retval = fscanf(fp, "%d", &gfx_act_mhz); + if (retval != 1) + err(1, "GFX ACT MHz"); + + return 0; +} + /* * snapshot_cpu_lpi() * @@ -3083,6 +3127,9 @@ int snapshot_proc_sysfs_files(void) if (DO_BIC(BIC_GFXMHz)) snapshot_gfx_mhz(); + if (DO_BIC(BIC_GFXACTMHz)) + snapshot_gfx_act_mhz(); + if (DO_BIC(BIC_CPU_LPI)) snapshot_cpu_lpi_us(); @@ -5236,6 +5283,9 @@ void process_cpuid() if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) BIC_PRESENT(BIC_GFXMHz); + if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXACTMHz); + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) BIC_PRESENT(BIC_CPU_LPI); else -- cgit v1.3.1 From 20de0dab238849414d33c81bc96e2db68cc61467 Mon Sep 17 00:00:00 2001 From: Antti Laakso Date: Mon, 17 Aug 2020 18:03:48 +0300 Subject: tools/power turbostat: Remove empty columns for Jacobsville Jacobsville doesn't have Package C2 and C6. Also Core and DRAM RAPL are not available. Adjust output accordingly. Signed-off-by: Antti Laakso Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7a6e91aedf0f..629b809075c1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2286,6 +2286,7 @@ int has_turbo_ratio_group_limits(int family, int model) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_FAM6_ATOM_TREMONT_D: return 1; } return 0; @@ -3534,6 +3535,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ pkg_cstate_limits = glm_pkg_cstate_limits; break; default: @@ -3616,6 +3618,17 @@ int is_ehl(unsigned int family, unsigned int model) } return 0; } +int is_jvl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_TREMONT_D: + return 1; + } + return 0; +} int has_turbo_ratio_limit(unsigned int family, unsigned int model) { @@ -4227,6 +4240,14 @@ void rapl_probe_intel(unsigned int family, unsigned int model) BIC_PRESENT(BIC_GFXWatt); } break; + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + if (rapl_joules) + BIC_PRESENT(BIC_Pkg_J); + else + BIC_PRESENT(BIC_PkgWatt); + break; case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; @@ -4629,6 +4650,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ return 1; } return 0; @@ -4958,9 +4980,6 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ALDERLAKE: return INTEL_FAM6_CANNONLAKE_L; - case INTEL_FAM6_ATOM_TREMONT_D: - return INTEL_FAM6_ATOM_GOLDMONT_D; - case INTEL_FAM6_ATOM_TREMONT_L: return INTEL_FAM6_ATOM_TREMONT; @@ -5214,6 +5233,14 @@ void process_cpuid() BIC_PRESENT(BIC_Mod_c6); use_c1_residency_msr = 1; } + if (is_jvl(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc2); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_Pkgpc6); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } if (is_dnv(family, model)) { BIC_PRESENT(BIC_CPU_c1); BIC_NOT_PRESENT(BIC_CPU_c3); -- cgit v1.3.1 From 33eb82251af9be47a625ca1578f44e596a3a0ca9 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 17 Aug 2020 17:42:15 -0500 Subject: tools/power turbostat: Support AMD Family 19h Family 19h processors have the same RAPL (Running average power limit) hardware register interface as Family 17h processors. Change the family checks to succeed for Family 17h and above to enable core and package energy measurement on Family 19h machines. Also update the TDP to the largest found at the bottom of the page at amd.com->processors->servers->epyc->2nd-gen-epyc, i.e., the EPYC 7H12. Signed-off-by: Kim Phillips Cc: Len Brown Cc: Len Brown Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 629b809075c1..0869f791ed14 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4162,13 +4162,8 @@ double get_tdp_intel(unsigned int model) double get_tdp_amd(unsigned int family) { - switch (family) { - case 0x17: - case 0x18: - default: - /* This is the max stock TDP of HEDT/Server Fam17h chips */ - return 250.0; - } + /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ + return 280.0; } /* @@ -4358,27 +4353,20 @@ void rapl_probe_amd(unsigned int family, unsigned int model) if (max_extended_level >= 0x80000007) { __cpuid(0x80000007, eax, ebx, ecx, edx); - /* RAPL (Fam 17h) */ + /* RAPL (Fam 17h+) */ has_rapl = edx & (1 << 14); } - if (!has_rapl) + if (!has_rapl || family < 0x17) return; - switch (family) { - case 0x17: /* Zen, Zen+ */ - case 0x18: /* Hygon Dhyana */ - do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - } - break; - default: - return; + do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); } if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) -- cgit v1.3.1 From 4be61e6b769fc3f97b58870aa4258e27968f07e1 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sun, 23 Aug 2020 23:27:02 +0300 Subject: tools/power turbostat: Build with _FILE_OFFSET_BITS=64 For compatibility reasons, Glibc off_t is a 32-bit type on 32-bit x86 unless _FILE_OFFSET_BITS=64 is defined. Add this define, as otherwise reading MSRs with index 0x80000000 and above attempts a pread with a negative offset, which fails. Signed-off-by: Alexander Monakov Tested-by: Liwei Song Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index d08765531bcb..f3e3c94ab9bd 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -12,6 +12,7 @@ turbostat : turbostat.c override CFLAGS += -O2 -Wall -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -D_FILE_OFFSET_BITS=64 override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c -- cgit v1.3.1 From 30ae801746ea0e099246318d373691a9e3238d40 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Tue, 1 Sep 2020 23:09:23 +0800 Subject: selftests/net: improve descriptions for XFAIL cases in psock_snd.sh Before changing this it's a bit confusing to read test output: raw csum_off with bad offset (fails) ./psock_snd: write: Invalid argument Change "fails" in the test case description to "expected to fail", so that the test output can be more understandable. Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/psock_snd.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh index 6331d91b86a6..170be65e0816 100755 --- a/tools/testing/selftests/net/psock_snd.sh +++ b/tools/testing/selftests/net/psock_snd.sh @@ -45,7 +45,7 @@ echo "raw vnet hdr" echo "raw csum_off" ./in_netns.sh ./psock_snd -v -c -echo "raw csum_off with bad offset (fails)" +echo "raw csum_off with bad offset (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -C) @@ -57,7 +57,7 @@ echo "raw min size" echo "raw mtu size" ./in_netns.sh ./psock_snd -l "${mss}" -echo "raw mtu size + 1 (fails)" +echo "raw mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -l "${mss_exceeds}") # fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed @@ -65,19 +65,19 @@ echo "raw mtu size + 1 (fails)" # echo "raw vlan mtu size" # ./in_netns.sh ./psock_snd -V -l "${mss}" -echo "raw vlan mtu size + 1 (fails)" +echo "raw vlan mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}") echo "dgram mtu size" ./in_netns.sh ./psock_snd -d -l "${mss}" -echo "dgram mtu size + 1 (fails)" +echo "dgram mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}") -echo "raw truncate hlen (fails: does not arrive)" +echo "raw truncate hlen (expected to fail: does not arrive)" (! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))") -echo "raw truncate hlen - 1 (fails: EINVAL)" +echo "raw truncate hlen - 1 (expected to fail: EINVAL)" (! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))") @@ -86,13 +86,13 @@ echo "raw truncate hlen - 1 (fails: EINVAL)" echo "raw gso min size" ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}" -echo "raw gso min size - 1 (fails)" +echo "raw gso min size - 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}") echo "raw gso max size" ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}" -echo "raw gso max size + 1 (fails)" +echo "raw gso max size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}") echo "OK. All tests passed" -- cgit v1.3.1 From 0201c575831171292489f14a8b6f79f98936b4d1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:29 -0700 Subject: libbpf: Ensure ELF symbols table is found before further ELF processing libbpf ELF parsing logic might need symbols available before ELF parsing is completed, so we need to make sure that symbols table section is found in a separate pass before all the subsequent sections are processed. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200903203542.15944-2-andriin@fb.com --- tools/lib/bpf/libbpf.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b688aadf09c5..ac56d4db6d3e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2720,14 +2720,38 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf *elf = obj->efile.elf; Elf_Data *btf_ext_data = NULL; Elf_Data *btf_data = NULL; - Elf_Scn *scn = NULL; int idx = 0, err = 0; + const char *name; + Elf_Data *data; + Elf_Scn *scn; + GElf_Shdr sh; + /* a bunch of ELF parsing functionality depends on processing symbols, + * so do the first pass and find the symbol table + */ + scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - const char *name; - GElf_Shdr sh; - Elf_Data *data; + if (elf_sec_hdr(obj, scn, &sh)) + return -LIBBPF_ERRNO__FORMAT; + + if (sh.sh_type == SHT_SYMTAB) { + if (obj->efile.symbols) { + pr_warn("elf: multiple symbol tables in %s\n", obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + data = elf_sec_data(obj, scn); + if (!data) + return -LIBBPF_ERRNO__FORMAT; + + obj->efile.symbols = data; + obj->efile.symbols_shndx = elf_ndxscn(scn); + obj->efile.strtabidx = sh.sh_link; + } + } + + scn = NULL; + while ((scn = elf_nextscn(elf, scn)) != NULL) { idx++; if (elf_sec_hdr(obj, scn, &sh)) @@ -2766,13 +2790,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { - if (obj->efile.symbols) { - pr_warn("elf: multiple symbol tables in %s\n", obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - obj->efile.symbols = data; - obj->efile.symbols_shndx = idx; - obj->efile.strtabidx = sh.sh_link; + /* already processed during the first pass above */ } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { if (sh.sh_flags & SHF_EXECINSTR) { if (strcmp(name, ".text") == 0) -- cgit v1.3.1 From c112239272c6bacf7fcd0a0dc71999ad8eb1c55c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:30 -0700 Subject: libbpf: Parse multi-function sections into multiple BPF programs Teach libbpf how to parse code sections into potentially multiple bpf_program instances, based on ELF FUNC symbols. Each BPF program will keep track of its position within containing ELF section for translating section instruction offsets into program instruction offsets: regardless of BPF program's location in ELF section, it's first instruction is always at local instruction offset 0, so when libbpf is working with relocations (which use section-based instruction offsets) this is critical to make proper translations. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200903203542.15944-3-andriin@fb.com --- tools/lib/bpf/libbpf.c | 249 ++++++++++++++++++++++++++++--------------------- 1 file changed, 142 insertions(+), 107 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ac56d4db6d3e..57f87eee5be5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -217,20 +217,45 @@ struct bpf_sec_def { * linux/filter.h. */ struct bpf_program { - /* Index in elf obj file, for relocation use. */ - int idx; - char *name; - int prog_ifindex; - char *section_name; const struct bpf_sec_def *sec_def; + char *section_name; + size_t sec_idx; + /* this program's instruction offset (in number of instructions) + * within its containing ELF section + */ + size_t sec_insn_off; + /* number of original instructions in ELF section belonging to this + * program, not taking into account subprogram instructions possible + * appended later during relocation + */ + size_t sec_insn_cnt; + /* Offset (in number of instructions) of the start of instruction + * belonging to this BPF program within its containing main BPF + * program. For the entry-point (main) BPF program, this is always + * zero. For a sub-program, this gets reset before each of main BPF + * programs are processed and relocated and is used to determined + * whether sub-program was already appended to the main program, and + * if yes, at which instruction offset. + */ + size_t sub_insn_off; + + char *name; /* section_name with / replaced by _; makes recursive pinning * in bpf_object__pin_programs easier */ char *pin_name; + + /* instructions that belong to BPF program; insns[0] is located at + * sec_insn_off instruction within its ELF section in ELF file, so + * when mapping ELF file instruction index to the local instruction, + * one needs to subtract sec_insn_off; and vice versa. + */ struct bpf_insn *insns; + /* actual number of instruction in this BPF program's image; for + * entry-point BPF programs this includes the size of main program + * itself plus all the used sub-programs, appended at the end + */ size_t insns_cnt, main_prog_cnt; - enum bpf_prog_type type; - bool load; struct reloc_desc *reloc_desc; int nr_reloc; @@ -246,7 +271,10 @@ struct bpf_program { void *priv; bpf_program_clear_priv_t clear_priv; + bool load; + enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; + int prog_ifindex; __u32 attach_btf_id; __u32 attach_prog_fd; void *func_info; @@ -446,6 +474,8 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr); static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); +static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx, + size_t off, __u32 sym_type, GElf_Sym *sym); void bpf_program__unload(struct bpf_program *prog) { @@ -493,7 +523,7 @@ static void bpf_program__exit(struct bpf_program *prog) prog->nr_reloc = 0; prog->insns_cnt = 0; - prog->idx = -1; + prog->sec_idx = -1; } static char *__bpf_program__pin_name(struct bpf_program *prog) @@ -508,130 +538,118 @@ static char *__bpf_program__pin_name(struct bpf_program *prog) } static int -bpf_program__init(void *data, size_t size, const char *section_name, int idx, - struct bpf_program *prog) +bpf_program__init(struct bpf_program *prog, const char *name, + size_t sec_idx, const char *sec_name, size_t sec_off, + void *insn_data, size_t insn_data_sz) { - const size_t bpf_insn_sz = sizeof(struct bpf_insn); - - if (size == 0 || size % bpf_insn_sz) { - pr_warn("corrupted section '%s', size: %zu\n", - section_name, size); + if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { + pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", + sec_name, name, sec_off, insn_data_sz); return -EINVAL; } - memset(prog, 0, sizeof(*prog)); + prog->sec_idx = sec_idx; + prog->sec_insn_off = sec_off / BPF_INSN_SZ; + prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; + /* insns_cnt can later be increased by appending used subprograms */ + prog->insns_cnt = prog->sec_insn_cnt; - prog->section_name = strdup(section_name); - if (!prog->section_name) { - pr_warn("failed to alloc name for prog under section(%d) %s\n", - idx, section_name); + prog->type = BPF_PROG_TYPE_UNSPEC; + prog->load = true; + + prog->instances.fds = NULL; + prog->instances.nr = -1; + + prog->section_name = strdup(sec_name); + if (!prog->section_name) + goto errout; + + prog->name = strdup(name); + if (!prog->name) goto errout; - } prog->pin_name = __bpf_program__pin_name(prog); - if (!prog->pin_name) { - pr_warn("failed to alloc pin name for prog under section(%d) %s\n", - idx, section_name); + if (!prog->pin_name) goto errout; - } - prog->insns = malloc(size); - if (!prog->insns) { - pr_warn("failed to alloc insns for prog under section %s\n", - section_name); + prog->insns = malloc(insn_data_sz); + if (!prog->insns) goto errout; - } - prog->insns_cnt = size / bpf_insn_sz; - memcpy(prog->insns, data, size); - prog->idx = idx; - prog->instances.fds = NULL; - prog->instances.nr = -1; - prog->type = BPF_PROG_TYPE_UNSPEC; - prog->load = true; + memcpy(prog->insns, insn_data, insn_data_sz); return 0; errout: + pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); bpf_program__exit(prog); return -ENOMEM; } static int -bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, - const char *section_name, int idx) +bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, + const char *sec_name, int sec_idx) { - struct bpf_program prog, *progs; + struct bpf_program *prog, *progs; + void *data = sec_data->d_buf; + size_t sec_sz = sec_data->d_size, sec_off, prog_sz; int nr_progs, err; - - err = bpf_program__init(data, size, section_name, idx, &prog); - if (err) - return err; + const char *name; + GElf_Sym sym; progs = obj->programs; nr_progs = obj->nr_programs; + sec_off = 0; - progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(progs[0])); - if (!progs) { - /* - * In this case the original obj->programs - * is still valid, so don't need special treat for - * bpf_close_object(). - */ - pr_warn("failed to alloc a new program under section '%s'\n", - section_name); - bpf_program__exit(&prog); - return -ENOMEM; - } - - pr_debug("elf: found program '%s'\n", prog.section_name); - obj->programs = progs; - obj->nr_programs = nr_progs + 1; - prog.obj = obj; - progs[nr_progs] = prog; - return 0; -} - -static int -bpf_object__init_prog_names(struct bpf_object *obj) -{ - Elf_Data *symbols = obj->efile.symbols; - struct bpf_program *prog; - size_t pi, si; + while (sec_off < sec_sz) { + if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) { + pr_warn("sec '%s': failed to find program symbol at offset %zu\n", + sec_name, sec_off); + return -LIBBPF_ERRNO__FORMAT; + } - for (pi = 0; pi < obj->nr_programs; pi++) { - const char *name = NULL; + prog_sz = sym.st_size; - prog = &obj->programs[pi]; + name = elf_sym_str(obj, sym.st_name); + if (!name) { + pr_warn("sec '%s': failed to get symbol name for offset %zu\n", + sec_name, sec_off); + return -LIBBPF_ERRNO__FORMAT; + } - for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) { - GElf_Sym sym; + if (sec_off + prog_sz > sec_sz) { + pr_warn("sec '%s': program at offset %zu crosses section boundary\n", + sec_name, sec_off); + return -LIBBPF_ERRNO__FORMAT; + } - if (!gelf_getsym(symbols, si, &sym)) - continue; - if (sym.st_shndx != prog->idx) - continue; - if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) - continue; + pr_debug("sec '%s': found program '%s' at offset %zu, code size %zu bytes\n", + sec_name, name, sec_off, prog_sz); - name = elf_sym_str(obj, sym.st_name); - if (!name) { - pr_warn("prog '%s': failed to get symbol name\n", - prog->section_name); - return -LIBBPF_ERRNO__LIBELF; - } + progs = reallocarray(progs, nr_progs + 1, sizeof(*progs)); + if (!progs) { + /* + * In this case the original obj->programs + * is still valid, so don't need special treat for + * bpf_close_object(). + */ + pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", + sec_name, name); + return -ENOMEM; } + obj->programs = progs; - if (!name && prog->idx == obj->efile.text_shndx) - name = ".text"; + prog = &progs[nr_progs]; + memset(prog, 0, sizeof(*prog)); + prog->obj = obj; - if (!name) { - pr_warn("prog '%s': failed to find program symbol\n", - prog->section_name); - return -EINVAL; - } + err = bpf_program__init(prog, name, sec_idx, sec_name, sec_off, + data + sec_off, prog_sz); + if (err) + return err; - prog->name = strdup(name); - if (!prog->name) - return -ENOMEM; + nr_progs++; + obj->nr_programs = nr_progs; + + sec_off += prog_sz; } return 0; @@ -2675,6 +2693,26 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) return data; } +static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx, + size_t off, __u32 sym_type, GElf_Sym *sym) +{ + Elf_Data *symbols = obj->efile.symbols; + size_t n = symbols->d_size / sizeof(GElf_Sym); + int i; + + for (i = 0; i < n; i++) { + if (!gelf_getsym(symbols, i, sym)) + continue; + if (sym->st_shndx != sec_idx || sym->st_value != off) + continue; + if (GELF_ST_TYPE(sym->st_info) != sym_type) + continue; + return 0; + } + + return -ENOENT; +} + static bool is_sec_name_dwarf(const char *name) { /* approximation, but the actual list is too long */ @@ -2795,9 +2833,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (sh.sh_flags & SHF_EXECINSTR) { if (strcmp(name, ".text") == 0) obj->efile.text_shndx = idx; - err = bpf_object__add_program(obj, data->d_buf, - data->d_size, - name, idx); + err = bpf_object__add_programs(obj, data, name, idx); if (err) return err; } else if (strcmp(name, DATA_SEC) == 0) { @@ -3183,7 +3219,7 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - if (prog->idx == idx) + if (prog->sec_idx == idx) return prog; } return NULL; @@ -5660,7 +5696,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, size_t new_cnt; int err; - if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) { + if (prog->sec_idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) { text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); if (!text) { pr_warn("no .text section found yet relo into text exist\n"); @@ -5783,7 +5819,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - if (prog->idx != obj->efile.text_shndx) + if (prog->sec_idx != obj->efile.text_shndx) continue; err = bpf_program__relocate(prog, obj); @@ -5799,7 +5835,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - if (prog->idx == obj->efile.text_shndx) + if (prog->sec_idx == obj->efile.text_shndx) continue; err = bpf_program__relocate(prog, obj); @@ -6215,7 +6251,7 @@ out: static bool bpf_program__is_function_storage(const struct bpf_program *prog, const struct bpf_object *obj) { - return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls; + return prog->sec_idx == obj->efile.text_shndx && obj->has_pseudo_calls; } static int @@ -6298,7 +6334,6 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object__finalize_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); - err = err ? : bpf_object__init_prog_names(obj); err = err ? : bpf_object__collect_reloc(obj); if (err) goto out; -- cgit v1.3.1 From db2b8b06423c7eb4abcb4310b7234f00b30d7730 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:31 -0700 Subject: libbpf: Support CO-RE relocations for multi-prog sections Fix up CO-RE relocation code to handle relocations against ELF sections containing multiple BPF programs. This requires lookup of a BPF program by its section name and instruction index it contains. While it could have been done as a simple loop, it could run into performance issues pretty quickly, as number of CO-RE relocations can be quite large in real-world applications, and each CO-RE relocation incurs BPF program look up now. So instead of simple loop, implement a binary search by section name + insn offset. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200903203542.15944-4-andriin@fb.com --- tools/lib/bpf/libbpf.c | 82 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 57f87eee5be5..11ad230ec20c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2753,6 +2753,18 @@ static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) return false; } +static int cmp_progs(const void *_a, const void *_b) +{ + const struct bpf_program *a = _a; + const struct bpf_program *b = _b; + + if (a->sec_idx != b->sec_idx) + return a->sec_idx < b->sec_idx ? -1 : 1; + + /* sec_insn_off can't be the same within the section */ + return a->sec_insn_off < b->sec_insn_off ? -1 : 1; +} + static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; @@ -2887,6 +2899,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } + + /* sort BPF programs by section name and in-section instruction offset + * for faster search */ + qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); + return bpf_object__init_btf(obj, btf_data, btf_ext_data); } @@ -3415,6 +3432,37 @@ static int bpf_program__record_reloc(struct bpf_program *prog, return 0; } +static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) +{ + return insn_idx >= prog->sec_insn_off && + insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; +} + +static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, + size_t sec_idx, size_t insn_idx) +{ + int l = 0, r = obj->nr_programs - 1, m; + struct bpf_program *prog; + + while (l < r) { + m = l + (r - l + 1) / 2; + prog = &obj->programs[m]; + + if (prog->sec_idx < sec_idx || + (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) + l = m; + else + r = m - 1; + } + /* matching program could be at index l, but it still might be the + * wrong one, so we need to double check conditions for the last time + */ + prog = &obj->programs[l]; + if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) + return prog; + return NULL; +} + static int bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, Elf_Data *data, struct bpf_object *obj) @@ -5229,6 +5277,11 @@ static int bpf_core_patch_insn(struct bpf_program *prog, if (relo->insn_off % BPF_INSN_SZ) return -EINVAL; insn_idx = relo->insn_off / BPF_INSN_SZ; + /* adjust insn_idx from section frame of reference to the local + * program's frame of reference; (sub-)program code is not yet + * relocated, so it's enough to just subtract in-section offset + */ + insn_idx = insn_idx - prog->sec_insn_off; insn = &prog->insns[insn_idx]; class = BPF_CLASS(insn->code); @@ -5619,7 +5672,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) struct bpf_program *prog; struct btf *targ_btf; const char *sec_name; - int i, err = 0; + int i, err = 0, insn_idx, sec_idx; if (obj->btf_ext->core_relo_info.len == 0) return 0; @@ -5646,24 +5699,37 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = -EINVAL; goto out; } + /* bpf_object's ELF is gone by now so it's not easy to find + * section index by section name, but we can find *any* + * bpf_program within desired section name and use it's + * prog->sec_idx to do a proper search by section index and + * instruction offset + */ prog = NULL; for (i = 0; i < obj->nr_programs; i++) { - if (!strcmp(obj->programs[i].section_name, sec_name)) { - prog = &obj->programs[i]; + prog = &obj->programs[i]; + if (strcmp(prog->section_name, sec_name) == 0) break; - } } if (!prog) { - pr_warn("failed to find program '%s' for CO-RE offset relocation\n", - sec_name); - err = -EINVAL; - goto out; + pr_warn("sec '%s': failed to find a BPF program\n", sec_name); + return -ENOENT; } + sec_idx = prog->sec_idx; pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { + insn_idx = rec->insn_off / BPF_INSN_SZ; + prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); + if (!prog) { + pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", + sec_name, insn_idx, i); + err = -EINVAL; + goto out; + } + err = bpf_core_apply_relo(prog, rec, i, obj->btf, targ_btf, cand_cache); if (err) { -- cgit v1.3.1 From c3c556966de7a0d5f8c908f407c673070fcdbf2b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:32 -0700 Subject: libbpf: Make RELO_CALL work for multi-prog sections and sub-program calls This patch implements general and correct logic for bpf-to-bpf sub-program calls. Only sub-programs used (called into) from entry-point (main) BPF program are going to be appended at the end of main BPF program. This ensures that BPF verifier won't encounter any dead code due to copying unreferenced sub-program. This change means that each entry-point (main) BPF program might have a different set of sub-programs appended to it and potentially in different order. This has implications on how sub-program call relocations need to be handled, described below. All relocations are now split into two categores: data references (maps and global variables) and code references (sub-program calls). This distinction is important because data references need to be relocated just once per each BPF program and sub-program. These relocation are agnostic to instruction locations, because they are not code-relative and they are relocating against static targets (maps, variables with fixes offsets, etc). Sub-program RELO_CALL relocations, on the other hand, are highly-dependent on code position, because they are recorded as instruction-relative offset. So BPF sub-programs (those that do calls into other sub-programs) can't be relocated once, they need to be relocated each time such a sub-program is appended at the end of the main entry-point BPF program. As mentioned above, each main BPF program might have different subset and differen order of sub-programs, so call relocations can't be done just once. Splitting data reference and calls relocations as described above allows to do this efficiently and cleanly. bpf_object__find_program_by_name() will now ignore non-entry BPF programs. Previously one could have looked up '.text' fake BPF program, but the existence of such BPF program was always an implementation detail and you can't do much useful with it. Now, though, all non-entry sub-programs get their own BPF program with name corresponding to a function name, so there is no more '.text' name for BPF program. This means there is no regression, effectively, w.r.t. API behavior. But this is important aspect to highlight, because it's going to be critical once libbpf implements static linking of BPF programs. Non-entry static BPF programs will be allowed to have conflicting names, but global and main-entry BPF program names should be unique. Just like with normal user-space linking process. So it's important to restrict this aspect right now, keep static and non-entry functions as internal implementation details, and not have to deal with regressions in behavior later. This patch leaves .BTF.ext adjustment as is until next patch. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-5-andriin@fb.com --- tools/lib/bpf/libbpf.c | 518 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 380 insertions(+), 138 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 11ad230ec20c..4e32a1028379 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -193,6 +193,7 @@ struct reloc_desc { int insn_idx; int map_idx; int sym_off; + bool processed; }; struct bpf_sec_def; @@ -255,7 +256,7 @@ struct bpf_program { * entry-point BPF programs this includes the size of main program * itself plus all the used sub-programs, appended at the end */ - size_t insns_cnt, main_prog_cnt; + size_t insns_cnt; struct reloc_desc *reloc_desc; int nr_reloc; @@ -412,7 +413,7 @@ struct bpf_object { int kconfig_map_idx; bool loaded; - bool has_pseudo_calls; + bool has_subcalls; /* * Information when doing elf related work. Only valid if fd @@ -537,17 +538,32 @@ static char *__bpf_program__pin_name(struct bpf_program *prog) return name; } +static bool insn_is_subprog_call(const struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) == BPF_CALL && + BPF_SRC(insn->code) == BPF_K && + insn->src_reg == BPF_PSEUDO_CALL && + insn->dst_reg == 0 && + insn->off == 0; +} + static int -bpf_program__init(struct bpf_program *prog, const char *name, - size_t sec_idx, const char *sec_name, size_t sec_off, - void *insn_data, size_t insn_data_sz) +bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, + const char *name, size_t sec_idx, const char *sec_name, + size_t sec_off, void *insn_data, size_t insn_data_sz) { + int i; + if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", sec_name, name, sec_off, insn_data_sz); return -EINVAL; } + memset(prog, 0, sizeof(*prog)); + prog->obj = obj; + prog->sec_idx = sec_idx; prog->sec_insn_off = sec_off / BPF_INSN_SZ; prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; @@ -577,6 +593,13 @@ bpf_program__init(struct bpf_program *prog, const char *name, goto errout; memcpy(prog->insns, insn_data, insn_data_sz); + for (i = 0; i < prog->insns_cnt; i++) { + if (insn_is_subprog_call(&prog->insns[i])) { + obj->has_subcalls = true; + break; + } + } + return 0; errout: pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); @@ -621,10 +644,10 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - pr_debug("sec '%s': found program '%s' at offset %zu, code size %zu bytes\n", - sec_name, name, sec_off, prog_sz); + pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", + sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); - progs = reallocarray(progs, nr_progs + 1, sizeof(*progs)); + progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); if (!progs) { /* * In this case the original obj->programs @@ -638,11 +661,9 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, obj->programs = progs; prog = &progs[nr_progs]; - memset(prog, 0, sizeof(*prog)); - prog->obj = obj; - err = bpf_program__init(prog, name, sec_idx, sec_name, sec_off, - data + sec_off, prog_sz); + err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, + sec_off, data + sec_off, prog_sz); if (err) return err; @@ -3255,6 +3276,12 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, return NULL; } +static bool prog_is_subprog(const struct bpf_object *obj, + const struct bpf_program *prog) +{ + return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls; +} + struct bpf_program * bpf_object__find_program_by_name(const struct bpf_object *obj, const char *name) @@ -3262,6 +3289,8 @@ bpf_object__find_program_by_name(const struct bpf_object *obj, struct bpf_program *prog; bpf_object__for_each_program(prog, obj) { + if (prog_is_subprog(obj, prog)) + continue; if (!strcmp(prog->name, name)) return prog; } @@ -3311,6 +3340,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, const char *sym_sec_name; struct bpf_map *map; + reloc_desc->processed = false; + /* sub-program call relocation */ if (insn->code == (BPF_JMP | BPF_CALL)) { if (insn->src_reg != BPF_PSEUDO_CALL) { @@ -3332,7 +3363,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog, reloc_desc->type = RELO_CALL; reloc_desc->insn_idx = insn_idx; reloc_desc->sym_off = sym->st_value; - obj->has_pseudo_calls = true; return 0; } @@ -3464,13 +3494,18 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, } static int -bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, - Elf_Data *data, struct bpf_object *obj) +bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data) { Elf_Data *symbols = obj->efile.symbols; const char *relo_sec_name, *sec_name; size_t sec_idx = shdr->sh_info; + struct bpf_program *prog; + struct reloc_desc *relos; int err, i, nrels; + const char *sym_name; + __u32 insn_idx; + GElf_Sym sym; + GElf_Rel rel; relo_sec_name = elf_sec_str(obj, shdr->sh_name); sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); @@ -3481,19 +3516,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, relo_sec_name, sec_idx, sec_name); nrels = shdr->sh_size / shdr->sh_entsize; - prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); - if (!prog->reloc_desc) { - pr_warn("failed to alloc memory in relocation\n"); - return -ENOMEM; - } - prog->nr_reloc = nrels; - for (i = 0; i < nrels; i++) { - const char *sym_name; - __u32 insn_idx; - GElf_Sym sym; - GElf_Rel rel; - if (!gelf_getrel(data, i, &rel)) { pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); return -LIBBPF_ERRNO__FORMAT; @@ -3510,15 +3533,42 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, } insn_idx = rel.r_offset / BPF_INSN_SZ; - sym_name = elf_sym_str(obj, sym.st_name) ?: ""; + /* relocations against static functions are recorded as + * relocations against the section that contains a function; + * in such case, symbol will be STT_SECTION and sym.st_name + * will point to empty string (0), so fetch section name + * instead + */ + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0) + sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx)); + else + sym_name = elf_sym_str(obj, sym.st_name); + sym_name = sym_name ?: "reloc_desc[i], + prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); + if (!prog) { + pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n", + relo_sec_name, i, sec_name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } + + relos = libbpf_reallocarray(prog->reloc_desc, + prog->nr_reloc + 1, sizeof(*relos)); + if (!relos) + return -ENOMEM; + prog->reloc_desc = relos; + + /* adjust insn_idx to local BPF program frame of reference */ + insn_idx -= prog->sec_insn_off; + err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], insn_idx, sym_name, &sym, &rel); if (err) return err; + + prog->nr_reloc++; } return 0; } @@ -5753,89 +5803,32 @@ out: return err; } +/* Relocate data references within program code: + * - map references; + * - global variable references; + * - extern references. + */ static int -bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, - struct reloc_desc *relo) -{ - struct bpf_insn *insn, *new_insn; - struct bpf_program *text; - size_t new_cnt; - int err; - - if (prog->sec_idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) { - text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); - if (!text) { - pr_warn("no .text section found yet relo into text exist\n"); - return -LIBBPF_ERRNO__RELOC; - } - new_cnt = prog->insns_cnt + text->insns_cnt; - new_insn = libbpf_reallocarray(prog->insns, new_cnt, sizeof(*insn)); - if (!new_insn) { - pr_warn("oom in prog realloc\n"); - return -ENOMEM; - } - prog->insns = new_insn; - - if (obj->btf_ext) { - err = bpf_program_reloc_btf_ext(prog, obj, - text->section_name, - prog->insns_cnt); - if (err) - return err; - } - - memcpy(new_insn + prog->insns_cnt, text->insns, - text->insns_cnt * sizeof(*insn)); - prog->main_prog_cnt = prog->insns_cnt; - prog->insns_cnt = new_cnt; - pr_debug("added %zd insn from %s to prog %s\n", - text->insns_cnt, text->section_name, - prog->section_name); - } - - insn = &prog->insns[relo->insn_idx]; - insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx; - return 0; -} - -static int -bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) +bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) { - int i, err; - - if (!prog) - return 0; - - if (obj->btf_ext) { - err = bpf_program_reloc_btf_ext(prog, obj, - prog->section_name, 0); - if (err) - return err; - } - - if (!prog->reloc_desc) - return 0; + int i; for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; struct extern_desc *ext; - if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { - pr_warn("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; - } - switch (relo->type) { case RELO_LD64: insn[0].src_reg = BPF_PSEUDO_MAP_FD; insn[0].imm = obj->maps[relo->map_idx].fd; + relo->processed = true; break; case RELO_DATA: insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; insn[1].imm = insn[0].imm + relo->sym_off; insn[0].imm = obj->maps[relo->map_idx].fd; + relo->processed = true; break; case RELO_EXTERN: ext = &obj->externs[relo->sym_off]; @@ -5847,11 +5840,10 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) insn[0].imm = (__u32)ext->ksym.addr; insn[1].imm = ext->ksym.addr >> 32; } + relo->processed = true; break; case RELO_CALL: - err = bpf_program__reloc_text(prog, obj, relo); - if (err) - return err; + /* will be handled as a follow up pass */ break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", @@ -5860,8 +5852,244 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) } } - zfree(&prog->reloc_desc); - prog->nr_reloc = 0; + return 0; +} + +static int cmp_relo_by_insn_idx(const void *key, const void *elem) +{ + size_t insn_idx = *(const size_t *)key; + const struct reloc_desc *relo = elem; + + if (insn_idx == relo->insn_idx) + return 0; + return insn_idx < relo->insn_idx ? -1 : 1; +} + +static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) +{ + return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, + sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); +} + +static int +bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, + struct bpf_program *prog) +{ + size_t sub_insn_idx, insn_idx, new_cnt; + struct bpf_program *subprog; + struct bpf_insn *insns, *insn; + struct reloc_desc *relo; + int err; + + err = reloc_prog_func_and_line_info(obj, main_prog, prog); + if (err) + return err; + + for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { + insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; + if (!insn_is_subprog_call(insn)) + continue; + + relo = find_prog_insn_relo(prog, insn_idx); + if (relo && relo->type != RELO_CALL) { + pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", + prog->name, insn_idx, relo->type); + return -LIBBPF_ERRNO__RELOC; + } + if (relo) { + /* sub-program instruction index is a combination of + * an offset of a symbol pointed to by relocation and + * call instruction's imm field; for global functions, + * call always has imm = -1, but for static functions + * relocation is against STT_SECTION and insn->imm + * points to a start of a static function + */ + sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; + } else { + /* if subprogram call is to a static function within + * the same ELF section, there won't be any relocation + * emitted, but it also means there is no additional + * offset necessary, insns->imm is relative to + * instruction's original position within the section + */ + sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; + } + + /* we enforce that sub-programs should be in .text section */ + subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); + if (!subprog) { + pr_warn("prog '%s': no .text section found yet sub-program call exists\n", + prog->name); + return -LIBBPF_ERRNO__RELOC; + } + + /* if it's the first call instruction calling into this + * subprogram (meaning this subprog hasn't been processed + * yet) within the context of current main program: + * - append it at the end of main program's instructions blog; + * - process is recursively, while current program is put on hold; + * - if that subprogram calls some other not yet processes + * subprogram, same thing will happen recursively until + * there are no more unprocesses subprograms left to append + * and relocate. + */ + if (subprog->sub_insn_off == 0) { + subprog->sub_insn_off = main_prog->insns_cnt; + + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; + + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); + + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); + + err = bpf_object__reloc_code(obj, main_prog, subprog); + if (err) + return err; + } + + /* main_prog->insns memory could have been re-allocated, so + * calculate pointer again + */ + insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; + /* calculate correct instruction position within current main + * prog; each main prog can have a different set of + * subprograms appended (potentially in different order as + * well), so position of any subprog can be different for + * different main programs */ + insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; + + if (relo) + relo->processed = true; + + pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", + prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); + } + + return 0; +} + +/* + * Relocate sub-program calls. + * + * Algorithm operates as follows. Each entry-point BPF program (referred to as + * main prog) is processed separately. For each subprog (non-entry functions, + * that can be called from either entry progs or other subprogs) gets their + * sub_insn_off reset to zero. This serves as indicator that this subprogram + * hasn't been yet appended and relocated within current main prog. Once its + * relocated, sub_insn_off will point at the position within current main prog + * where given subprog was appended. This will further be used to relocate all + * the call instructions jumping into this subprog. + * + * We start with main program and process all call instructions. If the call + * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off + * is zero), subprog instructions are appended at the end of main program's + * instruction array. Then main program is "put on hold" while we recursively + * process newly appended subprogram. If that subprogram calls into another + * subprogram that hasn't been appended, new subprogram is appended again to + * the *main* prog's instructions (subprog's instructions are always left + * untouched, as they need to be in unmodified state for subsequent main progs + * and subprog instructions are always sent only as part of a main prog) and + * the process continues recursively. Once all the subprogs called from a main + * prog or any of its subprogs are appended (and relocated), all their + * positions within finalized instructions array are known, so it's easy to + * rewrite call instructions with correct relative offsets, corresponding to + * desired target subprog. + * + * Its important to realize that some subprogs might not be called from some + * main prog and any of its called/used subprogs. Those will keep their + * subprog->sub_insn_off as zero at all times and won't be appended to current + * main prog and won't be relocated within the context of current main prog. + * They might still be used from other main progs later. + * + * Visually this process can be shown as below. Suppose we have two main + * programs mainA and mainB and BPF object contains three subprogs: subA, + * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and + * subC both call subB: + * + * +--------+ +-------+ + * | v v | + * +--+---+ +--+-+-+ +---+--+ + * | subA | | subB | | subC | + * +--+---+ +------+ +---+--+ + * ^ ^ + * | | + * +---+-------+ +------+----+ + * | mainA | | mainB | + * +-----------+ +-----------+ + * + * We'll start relocating mainA, will find subA, append it and start + * processing sub A recursively: + * + * +-----------+------+ + * | mainA | subA | + * +-----------+------+ + * + * At this point we notice that subB is used from subA, so we append it and + * relocate (there are no further subcalls from subB): + * + * +-----------+------+------+ + * | mainA | subA | subB | + * +-----------+------+------+ + * + * At this point, we relocate subA calls, then go one level up and finish with + * relocatin mainA calls. mainA is done. + * + * For mainB process is similar but results in different order. We start with + * mainB and skip subA and subB, as mainB never calls them (at least + * directly), but we see subC is needed, so we append and start processing it: + * + * +-----------+------+ + * | mainB | subC | + * +-----------+------+ + * Now we see subC needs subB, so we go back to it, append and relocate it: + * + * +-----------+------+------+ + * | mainB | subC | subB | + * +-----------+------+------+ + * + * At this point we unwind recursion, relocate calls in subC, then in mainB. + */ +static int +bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) +{ + struct bpf_program *subprog; + int i, j, err; + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + prog->section_name, 0); + if (err) + return err; + } + + /* mark all subprogs as not relocated (yet) within the context of + * current main program + */ + for (i = 0; i < obj->nr_programs; i++) { + subprog = &obj->programs[i]; + if (!prog_is_subprog(obj, subprog)) + continue; + + subprog->sub_insn_off = 0; + for (j = 0; j < subprog->nr_reloc; j++) + if (subprog->reloc_desc[j].type == RELO_CALL) + subprog->reloc_desc[j].processed = false; + } + + err = bpf_object__reloc_code(obj, prog, prog); + if (err) + return err; + + return 0; } @@ -5880,37 +6108,45 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - /* ensure .text is relocated first, as it's going to be copied as-is - * later for sub-program calls + /* relocate data references first for all programs and sub-programs, + * as they don't change relative to code locations, so subsequent + * subprogram processing won't need to re-calculate any of them */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - if (prog->sec_idx != obj->efile.text_shndx) - continue; - - err = bpf_program__relocate(prog, obj); + err = bpf_object__relocate_data(obj, prog); if (err) { pr_warn("prog '%s': failed to relocate data references: %d\n", prog->name, err); return err; } - break; } - /* now relocate everything but .text, which by now is relocated - * properly, so we can copy raw sub-program instructions as is safely + /* now relocate subprogram calls and append used subprograms to main + * programs; each copy of subprogram code needs to be relocated + * differently for each main program, because its code location might + * have changed */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - if (prog->sec_idx == obj->efile.text_shndx) + /* sub-program's sub-calls are relocated within the context of + * its main program only + */ + if (prog_is_subprog(obj, prog)) continue; - err = bpf_program__relocate(prog, obj); + err = bpf_object__relocate_calls(obj, prog); if (err) { pr_warn("prog '%s': failed to relocate calls: %d\n", prog->name, err); return err; } } + /* free up relocation descriptors */ + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + zfree(&prog->reloc_desc); + prog->nr_reloc = 0; + } return 0; } @@ -6030,41 +6266,53 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return 0; } -static int bpf_object__collect_reloc(struct bpf_object *obj) +static int cmp_relocs(const void *_a, const void *_b) { - int i, err; + const struct reloc_desc *a = _a; + const struct reloc_desc *b = _b; - if (!obj_elf_valid(obj)) { - pr_warn("Internal error: elf object is closed\n"); - return -LIBBPF_ERRNO__INTERNAL; - } + if (a->insn_idx != b->insn_idx) + return a->insn_idx < b->insn_idx ? -1 : 1; + + /* no two relocations should have the same insn_idx, but ... */ + if (a->type != b->type) + return a->type < b->type ? -1 : 1; + + return 0; +} + +static int bpf_object__collect_relos(struct bpf_object *obj) +{ + int i, err; for (i = 0; i < obj->efile.nr_reloc_sects; i++) { GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; Elf_Data *data = obj->efile.reloc_sects[i].data; int idx = shdr->sh_info; - struct bpf_program *prog; if (shdr->sh_type != SHT_REL) { pr_warn("internal error at %d\n", __LINE__); return -LIBBPF_ERRNO__INTERNAL; } - if (idx == obj->efile.st_ops_shndx) { + if (idx == obj->efile.st_ops_shndx) err = bpf_object__collect_st_ops_relos(obj, shdr, data); - } else if (idx == obj->efile.btf_maps_shndx) { + else if (idx == obj->efile.btf_maps_shndx) err = bpf_object__collect_map_relos(obj, shdr, data); - } else { - prog = bpf_object__find_prog_by_idx(obj, idx); - if (!prog) { - pr_warn("relocation failed: no prog in section(%d)\n", idx); - return -LIBBPF_ERRNO__RELOC; - } - err = bpf_program__collect_reloc(prog, shdr, data, obj); - } + else + err = bpf_object__collect_prog_relos(obj, shdr, data); if (err) return err; } + + for (i = 0; i < obj->nr_programs; i++) { + struct bpf_program *p = &obj->programs[i]; + + if (!p->nr_reloc) + continue; + + qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); + } return 0; } @@ -6314,12 +6562,6 @@ out: return err; } -static bool bpf_program__is_function_storage(const struct bpf_program *prog, - const struct bpf_object *obj) -{ - return prog->sec_idx == obj->efile.text_shndx && obj->has_pseudo_calls; -} - static int bpf_object__load_progs(struct bpf_object *obj, int log_level) { @@ -6336,7 +6578,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - if (bpf_program__is_function_storage(prog, obj)) + if (prog_is_subprog(obj, prog)) continue; if (!prog->load) { pr_debug("prog '%s': skipped loading\n", prog->name); @@ -6400,7 +6642,7 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object__finalize_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); - err = err ? : bpf_object__collect_reloc(obj); + err = err ? : bpf_object__collect_relos(obj); if (err) goto out; bpf_object__elf_finish(obj); @@ -7404,7 +7646,7 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) do { prog = __bpf_program__iter(prog, obj, true); - } while (prog && bpf_program__is_function_storage(prog, obj)); + } while (prog && prog_is_subprog(obj, prog)); return prog; } @@ -7416,7 +7658,7 @@ bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) do { prog = __bpf_program__iter(prog, obj, false); - } while (prog && bpf_program__is_function_storage(prog, obj)); + } while (prog && prog_is_subprog(obj, prog)); return prog; } -- cgit v1.3.1 From 8505e8709b5eedb7c662354e8436a2a164181109 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:33 -0700 Subject: libbpf: Implement generalized .BTF.ext func/line info adjustment Complete multi-prog sections and multi sub-prog support in libbpf by properly adjusting .BTF.ext's line and function information. Mark exposed btf_ext__reloc_func_info() and btf_ext__reloc_func_info() APIs as deprecated. These APIs have simplistic assumption that all sub-programs are going to be appended to all main BPF programs, which doesn't hold in real life. It's unlikely there are any users of this API, as it's very libbpf internals-specific. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-6-andriin@fb.com --- tools/lib/bpf/btf.h | 18 ++-- tools/lib/bpf/libbpf.c | 217 +++++++++++++++++++++++++++--------------- tools/lib/bpf/libbpf_common.h | 2 + 3 files changed, 153 insertions(+), 84 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 91f0ad0e0325..2a55320d87d0 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -57,14 +57,16 @@ LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); -LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *cnt); -LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt); +LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt); +LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4e32a1028379..ca2b5c9145da 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4241,75 +4241,6 @@ err_out: return err; } -static int -check_btf_ext_reloc_err(struct bpf_program *prog, int err, - void *btf_prog_info, const char *info_name) -{ - if (err != -ENOENT) { - pr_warn("Error in loading %s for sec %s.\n", - info_name, prog->section_name); - return err; - } - - /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ - - if (btf_prog_info) { - /* - * Some info has already been found but has problem - * in the last btf_ext reloc. Must have to error out. - */ - pr_warn("Error in relocating %s for sec %s.\n", - info_name, prog->section_name); - return err; - } - - /* Have problem loading the very first info. Ignore the rest. */ - pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n", - info_name, prog->section_name, info_name); - return 0; -} - -static int -bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, - const char *section_name, __u32 insn_offset) -{ - int err; - - if (!insn_offset || prog->func_info) { - /* - * !insn_offset => main program - * - * For sub prog, the main program's func_info has to - * be loaded first (i.e. prog->func_info != NULL) - */ - err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, - section_name, insn_offset, - &prog->func_info, - &prog->func_info_cnt); - if (err) - return check_btf_ext_reloc_err(prog, err, - prog->func_info, - "bpf_func_info"); - - prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); - } - - if (!insn_offset || prog->line_info) { - err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, - section_name, insn_offset, - &prog->line_info, - &prog->line_info_cnt); - if (err) - return check_btf_ext_reloc_err(prog, err, - prog->line_info, - "bpf_line_info"); - - prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); - } - - return 0; -} - #define BPF_CORE_SPEC_MAX_LEN 64 /* represents BPF CO-RE field or array element accessor */ @@ -5855,6 +5786,147 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) return 0; } +static int adjust_prog_btf_ext_info(const struct bpf_object *obj, + const struct bpf_program *prog, + const struct btf_ext_info *ext_info, + void **prog_info, __u32 *prog_rec_cnt, + __u32 *prog_rec_sz) +{ + void *copy_start = NULL, *copy_end = NULL; + void *rec, *rec_end, *new_prog_info; + const struct btf_ext_info_sec *sec; + size_t old_sz, new_sz; + const char *sec_name; + int i, off_adj; + + for_each_btf_ext_sec(ext_info, sec) { + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (!sec_name) + return -EINVAL; + if (strcmp(sec_name, prog->section_name) != 0) + continue; + + for_each_btf_ext_rec(ext_info, sec, i, rec) { + __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; + + if (insn_off < prog->sec_insn_off) + continue; + if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) + break; + + if (!copy_start) + copy_start = rec; + copy_end = rec + ext_info->rec_size; + } + + if (!copy_start) + return -ENOENT; + + /* append func/line info of a given (sub-)program to the main + * program func/line info + */ + old_sz = (*prog_rec_cnt) * ext_info->rec_size; + new_sz = old_sz + (copy_end - copy_start); + new_prog_info = realloc(*prog_info, new_sz); + if (!new_prog_info) + return -ENOMEM; + *prog_info = new_prog_info; + *prog_rec_cnt = new_sz / ext_info->rec_size; + memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); + + /* Kernel instruction offsets are in units of 8-byte + * instructions, while .BTF.ext instruction offsets generated + * by Clang are in units of bytes. So convert Clang offsets + * into kernel offsets and adjust offset according to program + * relocated position. + */ + off_adj = prog->sub_insn_off - prog->sec_insn_off; + rec = new_prog_info + old_sz; + rec_end = new_prog_info + new_sz; + for (; rec < rec_end; rec += ext_info->rec_size) { + __u32 *insn_off = rec; + + *insn_off = *insn_off / BPF_INSN_SZ + off_adj; + } + *prog_rec_sz = ext_info->rec_size; + return 0; + } + + return -ENOENT; +} + +static int +reloc_prog_func_and_line_info(const struct bpf_object *obj, + struct bpf_program *main_prog, + const struct bpf_program *prog) +{ + int err; + + /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't + * supprot func/line info + */ + if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC)) + return 0; + + /* only attempt func info relocation if main program's func_info + * relocation was successful + */ + if (main_prog != prog && !main_prog->func_info) + goto line_info; + + err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, + &main_prog->func_info, + &main_prog->func_info_cnt, + &main_prog->func_info_rec_size); + if (err) { + if (err != -ENOENT) { + pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", + prog->name, err); + return err; + } + if (main_prog->func_info) { + /* + * Some info has already been found but has problem + * in the last btf_ext reloc. Must have to error out. + */ + pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name); + return err; + } + /* Have problem loading the very first info. Ignore the rest. */ + pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", + prog->name); + } + +line_info: + /* don't relocate line info if main program's relocation failed */ + if (main_prog != prog && !main_prog->line_info) + return 0; + + err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, + &main_prog->line_info, + &main_prog->line_info_cnt, + &main_prog->line_info_rec_size); + if (err) { + if (err != -ENOENT) { + pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", + prog->name, err); + return err; + } + if (main_prog->line_info) { + /* + * Some info has already been found but has problem + * in the last btf_ext reloc. Must have to error out. + */ + pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); + return err; + } + /* Have problem loading the very first info. Ignore the rest. */ + pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", + prog->name); + } + return 0; +} + static int cmp_relo_by_insn_idx(const void *key, const void *elem) { size_t insn_idx = *(const size_t *)key; @@ -6064,13 +6136,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) struct bpf_program *subprog; int i, j, err; - if (obj->btf_ext) { - err = bpf_program_reloc_btf_ext(prog, obj, - prog->section_name, 0); - if (err) - return err; - } - /* mark all subprogs as not relocated (yet) within the context of * current main program */ diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index a23ae1ac27eb..947d8bd8a7bb 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -15,6 +15,8 @@ #define LIBBPF_API __attribute__((visibility("default"))) #endif +#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg))) + /* Helper macro to declare and initialize libbpf options struct * * This dance with uninitialized declaration, followed by memset to zero, -- cgit v1.3.1 From 7e06aad52929d17610f28acd40dd10ed62295f85 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:34 -0700 Subject: libbpf: Add multi-prog section support for struct_ops Adjust struct_ops handling code to work with multi-program ELF sections properly. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-7-andriin@fb.com --- tools/lib/bpf/libbpf.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ca2b5c9145da..2d6bf0af3305 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -73,8 +73,6 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); -static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj, - int idx); static const struct btf_type * skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); @@ -3249,20 +3247,6 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return 0; } -static struct bpf_program * -bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) -{ - struct bpf_program *prog; - size_t i; - - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (prog->sec_idx == idx) - return prog; - } - return NULL; -} - struct bpf_program * bpf_object__find_program_by_title(const struct bpf_object *obj, const char *title) @@ -8198,7 +8182,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, const struct btf *btf; struct bpf_map *map; Elf_Data *symbols; - unsigned int moff; + unsigned int moff, insn_idx; const char *name; __u32 member_idx; GElf_Sym sym; @@ -8243,6 +8227,12 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, map->name, (size_t)rel.r_offset, shdr_idx); return -LIBBPF_ERRNO__RELOC; } + if (sym.st_value % BPF_INSN_SZ) { + pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", + map->name, (__u64)sym.st_value); + return -LIBBPF_ERRNO__FORMAT; + } + insn_idx = sym.st_value / BPF_INSN_SZ; member = find_member_by_offset(st_ops->type, moff * 8); if (!member) { @@ -8259,7 +8249,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -EINVAL; } - prog = bpf_object__find_prog_by_idx(obj, shdr_idx); + prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx); if (!prog) { pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", map->name, shdr_idx, name); -- cgit v1.3.1 From a08c02f8d4ae50527b9f26a7147686ed55c93782 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:35 -0700 Subject: selftests/bpf: Add selftest for multi-prog sections and bpf-to-bpf calls Add a selftest excercising bpf-to-bpf subprogram calls, as well as multiple entry-point BPF programs per section. Also make sure that BPF CO-RE works for such set ups both for sub-programs and for multi-entry sections. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-8-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/subprogs.c | 31 +++++++ tools/testing/selftests/bpf/progs/test_subprogs.c | 103 ++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/subprogs.c create mode 100644 tools/testing/selftests/bpf/progs/test_subprogs.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c new file mode 100644 index 000000000000..a00abf58c037 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include +#include "test_subprogs.skel.h" + +static int duration; + +void test_subprogs(void) +{ + struct test_subprogs *skel; + int err; + + skel = test_subprogs__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = test_subprogs__attach(skel); + if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12); + CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17); + CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19); + CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36); + +cleanup: + test_subprogs__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c new file mode 100644 index 000000000000..d3c5673c0218 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_subprogs.c @@ -0,0 +1,103 @@ +#include "vmlinux.h" +#include +#include + +const char LICENSE[] SEC("license") = "GPL"; + +__noinline int sub1(int x) +{ + return x + 1; +} + +static __noinline int sub5(int v); + +__noinline int sub2(int y) +{ + return sub5(y + 2); +} + +static __noinline int sub3(int z) +{ + return z + 3 + sub1(4); +} + +static __noinline int sub4(int w) +{ + return w + sub3(5) + sub1(6); +} + +/* sub5() is an identitify function, just to test weirder functions layout and + * call patterns + */ +static __noinline int sub5(int v) +{ + return sub1(v) - 1; /* compensates sub1()'s + 1 */ +} + +/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer + * type, so we need to accept pointer as integer and then cast it inside the + * function + */ +__noinline int get_task_tgid(uintptr_t t) +{ + /* this ensures that CO-RE relocs work in multi-subprogs .text */ + return BPF_CORE_READ((struct task_struct *)(void *)t, tgid); +} + +int res1 = 0; +int res2 = 0; +int res3 = 0; +int res4 = 0; + +SEC("raw_tp/sys_enter") +int prog1(void *ctx) +{ + /* perform some CO-RE relocations to ensure they work with multi-prog + * sections correctly + */ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */ + return 0; +} + +SEC("raw_tp/sys_exit") +int prog2(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */ + return 0; +} + +/* prog3 has the same section name as prog1 */ +SEC("raw_tp/sys_enter") +int prog3(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */ + return 0; +} + +/* prog4 has the same section name as prog2 */ +SEC("raw_tp/sys_exit") +int prog4(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */ + return 0; +} -- cgit v1.3.1 From fd17e272be9c50190f1185a1007d72dc1eedb903 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:36 -0700 Subject: tools/bpftool: Replace bpf_program__title() with bpf_program__section_name() bpf_program__title() is deprecated, switch to bpf_program__section_name() and avoid compilation warnings. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-9-andriin@fb.com --- tools/bpf/bpftool/prog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d393eb8263a6..f7923414a052 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1304,7 +1304,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) enum bpf_prog_type prog_type = common_prog_type; if (prog_type == BPF_PROG_TYPE_UNSPEC) { - const char *sec_name = bpf_program__title(pos, false); + const char *sec_name = bpf_program__section_name(pos); err = get_prog_type_by_name(sec_name, &prog_type, &expected_attach_type); @@ -1398,7 +1398,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) err = bpf_obj_pin(bpf_program__fd(prog), pinfile); if (err) { p_err("failed to pin program %s", - bpf_program__title(prog, false)); + bpf_program__section_name(prog)); goto err_close_obj; } } else { -- cgit v1.3.1 From a7659cc30bc9210d885692d175f4bc6f3a9a2175 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:37 -0700 Subject: selftests/bpf: Don't use deprecated libbpf APIs Remove all uses of bpf_program__title() and bpf_program__find_program_by_title(). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-10-andriin@fb.com --- tools/testing/selftests/bpf/flow_dissector_load.h | 8 +++++++- tools/testing/selftests/bpf/prog_tests/reference_tracking.c | 2 +- tools/testing/selftests/bpf/test_socket_cookie.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index daeaeb518894..7290401ec172 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -23,7 +23,13 @@ static inline int bpf_flow_load(struct bpf_object **obj, if (ret) return ret; - main_prog = bpf_object__find_program_by_title(*obj, section_name); + main_prog = NULL; + bpf_object__for_each_program(prog, *obj) { + if (strcmp(section_name, bpf_program__section_name(prog)) == 0) { + main_prog = prog; + break; + } + } if (!main_prog) return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index fc0d7f4f02cf..ac1ee10cffd8 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -27,7 +27,7 @@ void test_reference_tracking(void) const char *title; /* Ignore .text sections */ - title = bpf_program__title(prog, false); + title = bpf_program__section_name(prog); if (strstr(title, ".text") != NULL) continue; diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index 154a8fd2a48d..ca7ca87e91aa 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -151,7 +151,7 @@ static int run_test(int cgfd) } bpf_object__for_each_program(prog, pobj) { - prog_name = bpf_program__title(prog, /*needs_copy*/ false); + prog_name = bpf_program__section_name(prog); if (libbpf_attach_type_by_name(prog_name, &attach_type)) goto err; -- cgit v1.3.1 From 52109584202783e90a83c95103307a9a03ba7d9e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:38 -0700 Subject: libbpf: Deprecate notion of BPF program "title" in favor of "section name" BPF program title is ambigious and misleading term. It is ELF section name, so let's just call it that and deprecate bpf_program__title() API in favor of bpf_program__section_name(). Additionally, using bpf_object__find_program_by_title() is now inherently dangerous and ambiguous, as multiple BPF program can have the same section name. So deprecate this API as well and recommend to switch to non-ambiguous bpf_object__find_program_by_name(). Internally, clean up usage and mis-usage of BPF program section name for denoting BPF program name. Shorten the field name to prog->sec_name to be consistent with all other prog->sec_* variables. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-11-andriin@fb.com --- tools/lib/bpf/libbpf.c | 215 +++++++++++++++++++++-------------------------- tools/lib/bpf/libbpf.h | 5 +- tools/lib/bpf/libbpf.map | 1 + 3 files changed, 101 insertions(+), 120 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2d6bf0af3305..47b43c13eee5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -217,7 +217,7 @@ struct bpf_sec_def { */ struct bpf_program { const struct bpf_sec_def *sec_def; - char *section_name; + char *sec_name; size_t sec_idx; /* this program's instruction offset (in number of instructions) * within its containing ELF section @@ -239,7 +239,7 @@ struct bpf_program { size_t sub_insn_off; char *name; - /* section_name with / replaced by _; makes recursive pinning + /* sec_name with / replaced by _; makes recursive pinning * in bpf_object__pin_programs easier */ char *pin_name; @@ -515,7 +515,7 @@ static void bpf_program__exit(struct bpf_program *prog) bpf_program__unload(prog); zfree(&prog->name); - zfree(&prog->section_name); + zfree(&prog->sec_name); zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -529,7 +529,7 @@ static char *__bpf_program__pin_name(struct bpf_program *prog) { char *name, *p; - name = p = strdup(prog->section_name); + name = p = strdup(prog->sec_name); while ((p = strchr(p, '/'))) *p = '_'; @@ -574,8 +574,8 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->instances.fds = NULL; prog->instances.nr = -1; - prog->section_name = strdup(sec_name); - if (!prog->section_name) + prog->sec_name = strdup(sec_name); + if (!prog->sec_name) goto errout; prog->name = strdup(name); @@ -3254,7 +3254,7 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, struct bpf_program *pos; bpf_object__for_each_program(pos, obj) { - if (pos->section_name && !strcmp(pos->section_name, title)) + if (pos->sec_name && !strcmp(pos->sec_name, title)) return pos; } return NULL; @@ -4994,8 +4994,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, *val = sz; } else { pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", - bpf_program__title(prog, false), - relo->kind, relo->insn_off / 8); + prog->name, relo->kind, relo->insn_off / 8); return -EINVAL; } if (validate) @@ -5017,8 +5016,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, if (byte_sz >= 8) { /* bitfield can't be read with 64-bit read */ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", - bpf_program__title(prog, false), - relo->kind, relo->insn_off / 8); + prog->name, relo->kind, relo->insn_off / 8); return -E2BIG; } byte_sz *= 2; @@ -5183,8 +5181,8 @@ static int bpf_core_calc_relo(const struct bpf_program *prog, } else if (err == -EOPNOTSUPP) { /* EOPNOTSUPP means unknown/unsupported relocation */ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", - bpf_program__title(prog, false), relo_idx, - core_relo_kind_str(relo->kind), relo->kind, relo->insn_off / 8); + prog->name, relo_idx, core_relo_kind_str(relo->kind), + relo->kind, relo->insn_off / 8); } return err; @@ -5198,7 +5196,7 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, int insn_idx, struct bpf_insn *insn) { pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", - bpf_program__title(prog, false), relo_idx, insn_idx); + prog->name, relo_idx, insn_idx); insn->code = BPF_JMP | BPF_CALL; insn->dst_reg = 0; insn->src_reg = 0; @@ -5270,14 +5268,14 @@ static int bpf_core_patch_insn(struct bpf_program *prog, return -EINVAL; if (res->validate && insn->imm != orig_val) { pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), relo_idx, + prog->name, relo_idx, insn_idx, insn->imm, orig_val, new_val); return -EINVAL; } orig_val = insn->imm; insn->imm = new_val; pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", - bpf_program__title(prog, false), relo_idx, insn_idx, + prog->name, relo_idx, insn_idx, orig_val, new_val); break; case BPF_LDX: @@ -5285,21 +5283,18 @@ static int bpf_core_patch_insn(struct bpf_program *prog, case BPF_STX: if (res->validate && insn->off != orig_val) { pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), relo_idx, - insn_idx, insn->off, orig_val, new_val); + prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val); return -EINVAL; } if (new_val > SHRT_MAX) { pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", - bpf_program__title(prog, false), relo_idx, - insn_idx, new_val); + prog->name, relo_idx, insn_idx, new_val); return -ERANGE; } orig_val = insn->off; insn->off = new_val; pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", - bpf_program__title(prog, false), relo_idx, insn_idx, - orig_val, new_val); + prog->name, relo_idx, insn_idx, orig_val, new_val); break; case BPF_LD: { __u64 imm; @@ -5310,14 +5305,14 @@ static int bpf_core_patch_insn(struct bpf_program *prog, insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 || insn[1].off != 0) { pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", - bpf_program__title(prog, false), relo_idx, insn_idx); + prog->name, relo_idx, insn_idx); return -EINVAL; } imm = insn[0].imm + ((__u64)insn[1].imm << 32); if (res->validate && imm != orig_val) { pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", - bpf_program__title(prog, false), relo_idx, + prog->name, relo_idx, insn_idx, (unsigned long long)imm, orig_val, new_val); return -EINVAL; @@ -5326,15 +5321,14 @@ static int bpf_core_patch_insn(struct bpf_program *prog, insn[0].imm = new_val; insn[1].imm = 0; /* currently only 32-bit values are supported */ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", - bpf_program__title(prog, false), relo_idx, insn_idx, + prog->name, relo_idx, insn_idx, (unsigned long long)imm, new_val); break; } default: pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", - bpf_program__title(prog, false), relo_idx, - insn_idx, insn->code, insn->src_reg, insn->dst_reg, - insn->off, insn->imm); + prog->name, relo_idx, insn_idx, insn->code, + insn->src_reg, insn->dst_reg, insn->off, insn->imm); return -EINVAL; } @@ -5464,7 +5458,6 @@ static int bpf_core_apply_relo(struct bpf_program *prog, const struct btf *targ_btf, struct hashmap *cand_cache) { - const char *prog_name = bpf_program__title(prog, false); struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; const void *type_key = u32_as_hash_key(relo->type_id); struct bpf_core_relo_res cand_res, targ_res; @@ -5491,13 +5484,13 @@ static int bpf_core_apply_relo(struct bpf_program *prog, err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); if (err) { pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", - prog_name, relo_idx, local_id, btf_kind_str(local_type), + prog->name, relo_idx, local_id, btf_kind_str(local_type), str_is_empty(local_name) ? "" : local_name, spec_str, err); return -EINVAL; } - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, + pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); libbpf_print(LIBBPF_DEBUG, "\n"); @@ -5514,7 +5507,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, /* libbpf doesn't support candidate search for anonymous types */ if (str_is_empty(spec_str)) { pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", - prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); + prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); return -EOPNOTSUPP; } @@ -5522,7 +5515,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); if (IS_ERR(cand_ids)) { pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld", - prog_name, relo_idx, local_id, btf_kind_str(local_type), + prog->name, relo_idx, local_id, btf_kind_str(local_type), local_name, PTR_ERR(cand_ids)); return PTR_ERR(cand_ids); } @@ -5538,13 +5531,13 @@ static int bpf_core_apply_relo(struct bpf_program *prog, err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec); if (err < 0) { pr_warn("prog '%s': relo #%d: error matching candidate #%d ", - prog_name, relo_idx, i); + prog->name, relo_idx, i); bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); libbpf_print(LIBBPF_WARN, ": %d\n", err); return err; } - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, + pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name, relo_idx, err == 0 ? "non-matching" : "matching", i); bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); libbpf_print(LIBBPF_DEBUG, "\n"); @@ -5564,7 +5557,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, * should all resolve to the same bit offset */ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", - prog_name, relo_idx, cand_spec.bit_offset, + prog->name, relo_idx, cand_spec.bit_offset, targ_spec.bit_offset); return -EINVAL; } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { @@ -5573,7 +5566,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, * proceed due to ambiguity */ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", - prog_name, relo_idx, + prog->name, relo_idx, cand_res.poison ? "failure" : "success", cand_res.new_val, targ_res.poison ? "failure" : "success", targ_res.new_val); return -EINVAL; @@ -5606,7 +5599,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, */ if (j == 0) { pr_debug("prog '%s': relo #%d: no matching targets found\n", - prog_name, relo_idx); + prog->name, relo_idx); /* calculate single target relo result explicitly */ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res); @@ -5619,7 +5612,7 @@ patch_insn: err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res); if (err) { pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", - prog_name, relo_idx, relo->insn_off, err); + prog->name, relo_idx, relo->insn_off, err); return -EINVAL; } @@ -5673,7 +5666,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) prog = NULL; for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - if (strcmp(prog->section_name, sec_name) == 0) + if (strcmp(prog->sec_name, sec_name) == 0) break; } if (!prog) { @@ -5787,7 +5780,7 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (!sec_name) return -EINVAL; - if (strcmp(sec_name, prog->section_name) != 0) + if (strcmp(sec_name, prog->sec_name) != 0) continue; for_each_btf_ext_rec(ext_info, sec, i, rec) { @@ -6527,8 +6520,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) int err = 0, fd, i, btf_id; if (prog->obj->loaded) { - pr_warn("prog '%s'('%s'): can't load after object was loaded\n", - prog->name, prog->section_name); + pr_warn("prog '%s': can't load after object was loaded\n", prog->name); return -EINVAL; } @@ -6544,7 +6536,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { pr_warn("Internal error: can't load program '%s'\n", - prog->section_name); + prog->name); return -LIBBPF_ERRNO__INTERNAL; } @@ -6559,8 +6551,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) if (!prog->preprocessor) { if (prog->instances.nr != 1) { - pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n", - prog->section_name, prog->instances.nr); + pr_warn("prog '%s': inconsistent nr(%d) != 1\n", + prog->name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, license, kern_ver, &fd); @@ -6578,13 +6570,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) prog->insns_cnt, &result); if (err) { pr_warn("Preprocessing the %dth instance of program '%s' failed\n", - i, prog->section_name); + i, prog->name); goto out; } if (!result.new_insn_ptr || !result.new_insn_cnt) { pr_debug("Skip loading the %dth instance of program '%s'\n", - i, prog->section_name); + i, prog->name); prog->instances.fds[i] = -1; if (result.pfd) *result.pfd = -1; @@ -6595,7 +6587,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) result.new_insn_cnt, license, kern_ver, &fd); if (err) { pr_warn("Loading the %dth instance of program '%s' failed\n", - i, prog->section_name); + i, prog->name); goto out; } @@ -6605,7 +6597,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) } out: if (err) - pr_warn("failed to load program '%s'\n", prog->section_name); + pr_warn("failed to load program '%s'\n", prog->name); zfree(&prog->insns); prog->insns_cnt = 0; return err; @@ -6697,7 +6689,7 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_object__elf_finish(obj); bpf_object__for_each_program(prog, obj) { - prog->sec_def = find_sec_def(prog->section_name); + prog->sec_def = find_sec_def(prog->sec_name); if (!prog->sec_def) /* couldn't guess, but user might manually specify */ continue; @@ -7078,7 +7070,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, if (instance < 0 || instance >= prog->instances.nr) { pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->section_name, prog->instances.nr); + instance, prog->name, prog->instances.nr); return -EINVAL; } @@ -7109,7 +7101,7 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, if (instance < 0 || instance >= prog->instances.nr) { pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->section_name, prog->instances.nr); + instance, prog->name, prog->instances.nr); return -EINVAL; } @@ -7139,8 +7131,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) } if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", - prog->section_name); + pr_warn("no instances of prog %s to pin\n", prog->name); return -EINVAL; } @@ -7202,8 +7193,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) } if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", - prog->section_name); + pr_warn("no instances of prog %s to pin\n", prog->name); return -EINVAL; } @@ -7738,11 +7728,16 @@ const char *bpf_program__name(const struct bpf_program *prog) return prog->name; } +const char *bpf_program__section_name(const struct bpf_program *prog) +{ + return prog->sec_name; +} + const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) { const char *title; - title = prog->section_name; + title = prog->sec_name; if (needs_copy) { title = strdup(title); if (!title) { @@ -7815,14 +7810,14 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n) if (n >= prog->instances.nr || n < 0) { pr_warn("Can't get the %dth fd from program %s: only %d instances\n", - n, prog->section_name, prog->instances.nr); + n, prog->name, prog->instances.nr); return -EINVAL; } fd = prog->instances.fds[n]; if (fd < 0) { pr_warn("%dth instance of program '%s' is invalid\n", - n, prog->section_name); + n, prog->name); return -ENOENT; } @@ -8259,7 +8254,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, if (prog->type == BPF_PROG_TYPE_UNSPEC) { const struct bpf_sec_def *sec_def; - sec_def = find_sec_def(prog->section_name); + sec_def = find_sec_def(prog->sec_name); if (sec_def && sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) { /* for pr_warn */ @@ -8282,7 +8277,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, invalid_prog: pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", - map->name, prog->name, prog->section_name, prog->type, + map->name, prog->name, prog->sec_name, prog->type, prog->attach_btf_id, prog->expected_attach_type, name); return -EINVAL; } @@ -8386,7 +8381,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog) { enum bpf_attach_type attach_type = prog->expected_attach_type; __u32 attach_prog_fd = prog->attach_prog_fd; - const char *name = prog->section_name; + const char *name = prog->sec_name; int i, err; if (!name) @@ -8913,14 +8908,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int prog_fd, err; if (pfd < 0) { - pr_warn("program '%s': invalid perf event FD %d\n", - bpf_program__title(prog, false), pfd); + pr_warn("prog '%s': invalid perf event FD %d\n", + prog->name, pfd); return ERR_PTR(-EINVAL); } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n", - bpf_program__title(prog, false)); + pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + prog->name); return ERR_PTR(-EINVAL); } @@ -8933,20 +8928,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { err = -errno; free(link); - pr_warn("program '%s': failed to attach to pfd %d: %s\n", - bpf_program__title(prog, false), pfd, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to attach to pfd %d: %s\n", + prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); if (err == -EPROTO) - pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", - bpf_program__title(prog, false), pfd); + pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", + prog->name, pfd); return ERR_PTR(err); } if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; free(link); - pr_warn("program '%s': failed to enable pfd %d: %s\n", - bpf_program__title(prog, false), pfd, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to enable pfd %d: %s\n", + prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return ERR_PTR(err); } return link; @@ -9068,9 +9061,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, 0 /* offset */, -1 /* pid */); if (pfd < 0) { - pr_warn("program '%s': failed to create %s '%s' perf event: %s\n", - bpf_program__title(prog, false), - retprobe ? "kretprobe" : "kprobe", func_name, + pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", + prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } @@ -9078,9 +9070,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warn("program '%s': failed to attach to %s '%s': %s\n", - bpf_program__title(prog, false), - retprobe ? "kretprobe" : "kprobe", func_name, + pr_warn("prog '%s': failed to attach to %s '%s': %s\n", + prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; } @@ -9093,7 +9084,7 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, const char *func_name; bool retprobe; - func_name = bpf_program__title(prog, false) + sec->len; + func_name = prog->sec_name + sec->len; retprobe = strcmp(sec->sec, "kretprobe/") == 0; return bpf_program__attach_kprobe(prog, retprobe, func_name); @@ -9111,9 +9102,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid); if (pfd < 0) { - pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", - bpf_program__title(prog, false), - retprobe ? "uretprobe" : "uprobe", + pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", + prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); @@ -9122,9 +9112,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n", - bpf_program__title(prog, false), - retprobe ? "uretprobe" : "uprobe", + pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", + prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; @@ -9192,9 +9181,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, pfd = perf_event_open_tracepoint(tp_category, tp_name); if (pfd < 0) { - pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", - bpf_program__title(prog, false), - tp_category, tp_name, + pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", + prog->name, tp_category, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } @@ -9202,9 +9190,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n", - bpf_program__title(prog, false), - tp_category, tp_name, + pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", + prog->name, tp_category, tp_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; } @@ -9217,7 +9204,7 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, char *sec_name, *tp_cat, *tp_name; struct bpf_link *link; - sec_name = strdup(bpf_program__title(prog, false)); + sec_name = strdup(prog->sec_name); if (!sec_name) return ERR_PTR(-ENOMEM); @@ -9246,8 +9233,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warn("program '%s': can't attach before loaded\n", - bpf_program__title(prog, false)); + pr_warn("prog '%s': can't attach before loaded\n", prog->name); return ERR_PTR(-EINVAL); } @@ -9260,9 +9246,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, if (pfd < 0) { pfd = -errno; free(link); - pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n", - bpf_program__title(prog, false), tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", + prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link->fd = pfd; @@ -9272,7 +9257,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, struct bpf_program *prog) { - const char *tp_name = bpf_program__title(prog, false) + sec->len; + const char *tp_name = prog->sec_name + sec->len; return bpf_program__attach_raw_tracepoint(prog, tp_name); } @@ -9286,8 +9271,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warn("program '%s': can't attach before loaded\n", - bpf_program__title(prog, false)); + pr_warn("prog '%s': can't attach before loaded\n", prog->name); return ERR_PTR(-EINVAL); } @@ -9300,9 +9284,8 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) if (pfd < 0) { pfd = -errno; free(link); - pr_warn("program '%s': failed to attach: %s\n", - bpf_program__title(prog, false), - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to attach: %s\n", + prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link->fd = pfd; @@ -9348,8 +9331,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warn("program '%s': can't attach before loaded\n", - bpf_program__title(prog, false)); + pr_warn("prog '%s': can't attach before loaded\n", prog->name); return ERR_PTR(-EINVAL); } @@ -9363,8 +9345,8 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, if (link_fd < 0) { link_fd = -errno; free(link); - pr_warn("program '%s': failed to attach to %s: %s\n", - bpf_program__title(prog, false), target_name, + pr_warn("prog '%s': failed to attach to %s: %s\n", + prog->name, target_name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); return ERR_PTR(link_fd); } @@ -9408,8 +9390,7 @@ bpf_program__attach_iter(struct bpf_program *prog, prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warn("program '%s': can't attach before loaded\n", - bpf_program__title(prog, false)); + pr_warn("prog '%s': can't attach before loaded\n", prog->name); return ERR_PTR(-EINVAL); } @@ -9423,9 +9404,8 @@ bpf_program__attach_iter(struct bpf_program *prog, if (link_fd < 0) { link_fd = -errno; free(link); - pr_warn("program '%s': failed to attach to iterator: %s\n", - bpf_program__title(prog, false), - libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to attach to iterator: %s\n", + prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); return ERR_PTR(link_fd); } link->fd = link_fd; @@ -9436,7 +9416,7 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog) { const struct bpf_sec_def *sec_def; - sec_def = find_sec_def(bpf_program__title(prog, false)); + sec_def = find_sec_def(prog->sec_name); if (!sec_def || !sec_def->attach_fn) return ERR_PTR(-ESRCH); @@ -10506,12 +10486,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; const struct bpf_sec_def *sec_def; - const char *sec_name = bpf_program__title(prog, false); if (!prog->load) continue; - sec_def = find_sec_def(sec_name); + sec_def = find_sec_def(prog->sec_name); if (!sec_def || !sec_def->attach_fn) continue; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 308e0ded8f14..a750f67a23f6 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -198,8 +198,9 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); -LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, - bool needs_copy); +LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog); +LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead") +const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 3fedcdc4ae2f..92ceb48a5ca2 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -302,6 +302,7 @@ LIBBPF_0.1.0 { LIBBPF_0.2.0 { global: + bpf_program__section_name; perf_buffer__buffer_cnt; perf_buffer__buffer_fd; perf_buffer__epoll_fd; -- cgit v1.3.1 From d86687ae6b759a3ac3e2db63390cef6006144681 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:39 -0700 Subject: selftests/bpf: Turn fexit_bpf2bpf into test with subtests There are clearly 4 subtests, so make it official. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-12-andriin@fb.com --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index a550dab9ba7a..eda682727787 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -208,11 +208,18 @@ static void test_func_map_prog_compatibility(void) void test_fexit_bpf2bpf(void) { - test_target_no_callees(); - test_target_yes_callees(); - test_func_replace(); - test_func_replace_verify(); - test_func_sockmap_update(); - test_func_replace_return_code(); - test_func_map_prog_compatibility(); + if (test__start_subtest("target_no_callees")) + test_target_no_callees(); + if (test__start_subtest("target_yes_callees")) + test_target_yes_callees(); + if (test__start_subtest("func_replace")) + test_func_replace(); + if (test__start_subtest("func_replace_verify")) + test_func_replace_verify(); + if (test__start_subtest("func_sockmap_update")) + test_func_sockmap_update(); + if (test__start_subtest("func_replace_return_code")) + test_func_replace_return_code(); + if (test__start_subtest("func_map_prog_compatibility")) + test_func_map_prog_compatibility(); } -- cgit v1.3.1 From fab45be1d26e8c3cee62e09bcc39c149b175f848 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:40 -0700 Subject: selftests/bpf: Add subprogs to pyperf, strobemeta, and l4lb_noinline tests Add use of non-inlined subprogs to few bigger selftests to excercise libbpf's bpf2bpf handling logic. Also split l4lb_all selftest into two sub-tests. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-13-andriin@fb.com --- .../selftests/bpf/prog_tests/bpf_verif_scale.c | 4 +++ tools/testing/selftests/bpf/prog_tests/l4lb_all.c | 9 +++-- tools/testing/selftests/bpf/progs/pyperf.h | 11 ++++-- .../testing/selftests/bpf/progs/pyperf_subprogs.c | 5 +++ tools/testing/selftests/bpf/progs/strobemeta.h | 30 +++++++++++----- .../selftests/bpf/progs/strobemeta_subprogs.c | 10 ++++++ .../selftests/bpf/progs/test_l4lb_noinline.c | 41 ++++++++++------------ 7 files changed, 73 insertions(+), 37 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/pyperf_subprogs.c create mode 100644 tools/testing/selftests/bpf/progs/strobemeta_subprogs.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index e9f2f12ba06b..e698ee6bb6c2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -49,6 +49,7 @@ void test_bpf_verif_scale(void) { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, /* full unroll by llvm */ { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, @@ -86,6 +87,9 @@ void test_bpf_verif_scale(void) { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + /* non-inlined subprogs */ + { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index c2d373e294bb..8073105548ff 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -80,9 +80,8 @@ out: void test_l4lb_all(void) { - const char *file1 = "./test_l4lb.o"; - const char *file2 = "./test_l4lb_noinline.o"; - - test_l4lb(file1); - test_l4lb(file2); + if (test__start_subtest("l4lb_inline")) + test_l4lb("test_l4lb.o"); + if (test__start_subtest("l4lb_noinline")) + test_l4lb("test_l4lb_noinline.o"); } diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index cc615b82b56e..2fb7adafb6b6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -67,7 +67,12 @@ typedef struct { void* co_name; // PyCodeObject.co_name } FrameData; -static __always_inline void *get_thread_state(void *tls_base, PidData *pidData) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *get_thread_state(void *tls_base, PidData *pidData) { void* thread_state; int key; @@ -155,7 +160,9 @@ struct { } stackmap SEC(".maps"); #ifdef GLOBAL_FUNC -__attribute__((noinline)) +__noinline +#elif defined(SUBPROGS) +static __noinline #else static __always_inline #endif diff --git a/tools/testing/selftests/bpf/progs/pyperf_subprogs.c b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c new file mode 100644 index 000000000000..60e27a7f0cca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define STACK_MAX_LEN 50 +#define SUBPROGS +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index ad61b722a9de..7de534f38c3f 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -266,8 +266,12 @@ struct tls_index { uint64_t offset; }; -static __always_inline void *calc_location(struct strobe_value_loc *loc, - void *tls_base) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *calc_location(struct strobe_value_loc *loc, void *tls_base) { /* * tls_mode value is: @@ -327,10 +331,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc, : NULL; } -static __always_inline void read_int_var(struct strobemeta_cfg *cfg, - size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void read_int_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data) { void *location = calc_location(&cfg->int_locs[idx], tls_base); if (!location) @@ -440,8 +449,13 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, * read_strobe_meta returns NULL, if no metadata was read; otherwise returns * pointer to *right after* payload ends */ -static __always_inline void *read_strobe_meta(struct task_struct *task, - struct strobemeta_payload *data) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *read_strobe_meta(struct task_struct *task, + struct strobemeta_payload *data) { pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; diff --git a/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c new file mode 100644 index 000000000000..b6c01f8fc559 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook + +#define STROBE_MAX_INTS 2 +#define STROBE_MAX_STRS 25 +#define STROBE_MAX_MAPS 13 +#define STROBE_MAX_MAP_ENTRIES 20 +#define NO_UNROLL +#define SUBPROGS +#include "strobemeta.h" diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index 28351936a438..b9e2753f4f91 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -17,9 +17,7 @@ #include "test_iptunnel_common.h" #include -int _version SEC("version") = 1; - -static __u32 rol32(__u32 word, unsigned int shift) +static __always_inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); } @@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift) typedef unsigned int u32; -static u32 jhash(const void *key, u32 length, u32 initval) +static __noinline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c; const unsigned char *k = key; @@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval) return c; } -static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; b += initval; @@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) return c; } -static u32 jhash_2words(u32 a, u32 b, u32 initval) +static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); } @@ -200,8 +198,7 @@ struct { __type(value, struct ctl_value); } ctl_array SEC(".maps"); -static __u32 get_packet_hash(struct packet_description *pckt, - bool ipv6) +static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6) { if (ipv6) return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), @@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt, return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); } -static bool get_packet_dst(struct real_definition **real, - struct packet_description *pckt, - struct vip_meta *vip_info, - bool is_ipv6) +static __noinline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) { __u32 hash = get_packet_hash(pckt, is_ipv6); __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; @@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real, return true; } -static int parse_icmpv6(void *data, void *data_end, __u64 off, - struct packet_description *pckt) +static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) { struct icmp6hdr *icmp_hdr; struct ipv6hdr *ip6h; @@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off, return TC_ACT_UNSPEC; } -static int parse_icmp(void *data, void *data_end, __u64 off, - struct packet_description *pckt) +static __noinline int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) { struct icmphdr *icmp_hdr; struct iphdr *iph; @@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off, return TC_ACT_UNSPEC; } -static bool parse_udp(void *data, __u64 off, void *data_end, - struct packet_description *pckt) +static __noinline bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) { struct udphdr *udp; udp = data + off; @@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end, return true; } -static bool parse_tcp(void *data, __u64 off, void *data_end, - struct packet_description *pckt) +static __noinline bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) { struct tcphdr *tcp; @@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end, return true; } -static int process_packet(void *data, __u64 off, void *data_end, - bool is_ipv6, struct __sk_buff *skb) +static __noinline int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) { void *pkt_start = (void *)(long)skb->data; struct packet_description pckt = {}; -- cgit v1.3.1 From baaf680e089f34468f26e86f76af712105f6019b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:41 -0700 Subject: selftests/bpf: Modernize xdp_noinline test w/ skeleton and __noinline Update xdp_noinline to use BPF skeleton and force __noinline on helper sub-programs. Also, split existing logic into v4- and v6-only to complicate sub-program calling patterns (partially overlapped sets of functions for entry-level BPF programs). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200903203542.15944-14-andriin@fb.com --- .../selftests/bpf/prog_tests/xdp_noinline.c | 49 ++++++++-------------- .../selftests/bpf/progs/test_xdp_noinline.c | 36 +++++++++++----- 2 files changed, 43 insertions(+), 42 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index f284f72158ef..a1f06424cf83 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include "test_xdp_noinline.skel.h" void test_xdp_noinline(void) { - const char *file = "./test_xdp_noinline.o"; unsigned int nr_cpus = bpf_num_possible_cpus(); + struct test_xdp_noinline *skel; struct vip key = {.protocol = 6}; struct vip_meta { __u32 flags; @@ -25,58 +26,42 @@ void test_xdp_noinline(void) } real_def = {.dst = MAGIC_VAL}; __u32 ch_key = 11, real_num = 3; __u32 duration, retval, size; - int err, i, prog_fd, map_fd; + int err, i; __u64 bytes = 0, pkts = 0; - struct bpf_object *obj; char buf[128]; u32 *magic = (u32 *)buf; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (CHECK_FAIL(err)) + skel = test_xdp_noinline__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "failed\n")) return; - map_fd = bpf_find_map(__func__, obj, "vip_map"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &key, &value, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0); - map_fd = bpf_find_map(__func__, obj, "ch_rings"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); - - map_fd = bpf_find_map(__func__, obj, "reals"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &real_num, &real_def, 0); - - err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4), + NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); CHECK(err || retval != 1 || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); - err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6), + NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); CHECK(err || retval != 1 || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); - map_fd = bpf_find_map(__func__, obj, "stats"); - if (map_fd < 0) - goto out; - bpf_map_lookup_elem(map_fd, &stats_key, stats); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats); for (i = 0; i < nr_cpus; i++) { bytes += stats[i].bytes; pkts += stats[i].pkts; } - if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 || - pkts != NUM_ITER * 2)) { - printf("test_xdp_noinline:FAIL:stats %lld %lld\n", - bytes, pkts); - } -out: - bpf_object__close(obj); + CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2, + "stats", "bytes %lld pkts %lld\n", + (unsigned long long)bytes, (unsigned long long)pkts); + test_xdp_noinline__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 8beecec166d9..3a67921f62b5 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -16,7 +16,7 @@ #include #include -static __u32 rol32(__u32 word, unsigned int shift) +static __always_inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); } @@ -49,7 +49,7 @@ static __u32 rol32(__u32 word, unsigned int shift) typedef unsigned int u32; -static __attribute__ ((noinline)) +static __noinline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c; @@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval) return c; } -__attribute__ ((noinline)) +__noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; @@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) return c; } -__attribute__ ((noinline)) +__noinline u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); @@ -213,7 +213,7 @@ struct eth_hdr { unsigned short eth_proto; }; -static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp) { __u64 off = sizeof(struct eth_hdr); if (is_ipv6) { @@ -797,8 +797,8 @@ out: return XDP_DROP; } -__attribute__ ((section("xdp-test"), used)) -int balancer_ingress(struct xdp_md *ctx) +SEC("xdp-test-v4") +int balancer_ingress_v4(struct xdp_md *ctx) { void *data = (void *)(long)ctx->data; void *data_end = (void *)(long)ctx->data_end; @@ -812,11 +812,27 @@ int balancer_ingress(struct xdp_md *ctx) eth_proto = bpf_ntohs(eth->eth_proto); if (eth_proto == ETH_P_IP) return process_packet(data, nh_off, data_end, 0, ctx); - else if (eth_proto == ETH_P_IPV6) + else + return XDP_DROP; +} + +SEC("xdp-test-v6") +int balancer_ingress_v6(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = bpf_ntohs(eth->eth_proto); + if (eth_proto == ETH_P_IPV6) return process_packet(data, nh_off, data_end, 1, ctx); else return XDP_DROP; } -char _license[] __attribute__ ((section("license"), used)) = "GPL"; -int _version __attribute__ ((section("version"), used)) = 1; +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From ee333df50bff22956ad37f3784cf120028f343a8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 13:35:42 -0700 Subject: selftests/bpf: Add __noinline variant of cls_redirect selftest As one of the most complicated and close-to-real-world programs, cls_redirect is a good candidate to exercise libbpf's logic of handling bpf2bpf calls. So add variant with using explicit __noinline for majority of functions except few most basic ones. If those few functions are inlined, verifier starts to complain about program instruction limit of 1mln instructions being exceeded, most probably due to instruction overhead of doing a sub-program call. Convert user-space part of selftest to have to sub-tests: with and without inlining. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Cc: Lorenz Bauer Link: https://lore.kernel.org/bpf/20200903203542.15944-15-andriin@fb.com --- .../selftests/bpf/prog_tests/cls_redirect.c | 72 +++++++++++--- .../selftests/bpf/progs/test_cls_redirect.c | 105 +++++++++++---------- .../bpf/progs/test_cls_redirect_subprogs.c | 2 + 3 files changed, 115 insertions(+), 64 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index f259085cca6a..9781d85cb223 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -12,10 +12,13 @@ #include "progs/test_cls_redirect.h" #include "test_cls_redirect.skel.h" +#include "test_cls_redirect_subprogs.skel.h" #define ENCAP_IP INADDR_LOOPBACK #define ENCAP_PORT (1234) +static int duration = 0; + struct addr_port { in_port_t port; union { @@ -361,30 +364,18 @@ static void close_fds(int *fds, int n) close(fds[i]); } -void test_cls_redirect(void) +static void test_cls_redirect_common(struct bpf_program *prog) { - struct test_cls_redirect *skel = NULL; struct bpf_prog_test_run_attr tattr = {}; int families[] = { AF_INET, AF_INET6 }; struct sockaddr_storage ss; struct sockaddr *addr; socklen_t slen; int i, j, err; - int servers[__NR_KIND][ARRAY_SIZE(families)] = {}; int conns[__NR_KIND][ARRAY_SIZE(families)] = {}; struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)]; - skel = test_cls_redirect__open(); - if (CHECK_FAIL(!skel)) - return; - - skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); - skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); - - if (CHECK_FAIL(test_cls_redirect__load(skel))) - goto cleanup; - addr = (struct sockaddr *)&ss; for (i = 0; i < ARRAY_SIZE(families); i++) { slen = prepare_addr(&ss, families[i]); @@ -402,7 +393,7 @@ void test_cls_redirect(void) goto cleanup; } - tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect); + tattr.prog_fd = bpf_program__fd(prog); for (i = 0; i < ARRAY_SIZE(tests); i++) { struct test_cfg *test = &tests[i]; @@ -450,7 +441,58 @@ void test_cls_redirect(void) } cleanup: - test_cls_redirect__destroy(skel); close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0])); close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0])); } + +static void test_cls_redirect_inlined(void) +{ + struct test_cls_redirect *skel; + int err; + + skel = test_cls_redirect__open(); + if (CHECK(!skel, "skel_open", "failed\n")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect__destroy(skel); +} + +static void test_cls_redirect_subprogs(void) +{ + struct test_cls_redirect_subprogs *skel; + int err; + + skel = test_cls_redirect_subprogs__open(); + if (CHECK(!skel, "skel_open", "failed\n")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect_subprogs__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect_subprogs__destroy(skel); +} + +void test_cls_redirect(void) +{ + if (test__start_subtest("cls_redirect_inlined")) + test_cls_redirect_inlined(); + if (test__start_subtest("cls_redirect_subprogs")) + test_cls_redirect_subprogs(); +} diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index f0b72e86bee5..c9f8464996ea 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -22,6 +22,12 @@ #include "test_cls_redirect.h" +#ifdef SUBPROGS +#define INLINING __noinline +#else +#define INLINING __always_inline +#endif + #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) @@ -125,7 +131,7 @@ typedef struct buf { uint8_t *const tail; } buf_t; -static size_t buf_off(const buf_t *buf) +static __always_inline size_t buf_off(const buf_t *buf) { /* Clang seems to optimize constructs like * a - b + c @@ -145,7 +151,7 @@ static size_t buf_off(const buf_t *buf) return off; } -static bool buf_copy(buf_t *buf, void *dst, size_t len) +static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len) { if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) { return false; @@ -155,7 +161,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len) return true; } -static bool buf_skip(buf_t *buf, const size_t len) +static __always_inline bool buf_skip(buf_t *buf, const size_t len) { /* Check whether off + len is valid in the non-linear part. */ if (buf_off(buf) + len > buf->skb->len) { @@ -173,7 +179,7 @@ static bool buf_skip(buf_t *buf, const size_t len) * If scratch is not NULL, the function will attempt to load non-linear * data via bpf_skb_load_bytes. On success, scratch is returned. */ -static void *buf_assign(buf_t *buf, const size_t len, void *scratch) +static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch) { if (buf->head + len > buf->tail) { if (scratch == NULL) { @@ -188,7 +194,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch) return ptr; } -static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) +static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) { if (ipv4->ihl <= 5) { return true; @@ -197,13 +203,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) return buf_skip(buf, (ipv4->ihl - 5) * 4); } -static bool ipv4_is_fragment(const struct iphdr *ip) +static INLINING bool ipv4_is_fragment(const struct iphdr *ip) { uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; } -static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) +static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) { struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch); if (ipv4 == NULL) { @@ -222,7 +228,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) } /* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ -static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) +static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) { if (!buf_copy(pkt, ports, sizeof(*ports))) { return false; @@ -237,7 +243,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) return true; } -static uint16_t pkt_checksum_fold(uint32_t csum) +static INLINING uint16_t pkt_checksum_fold(uint32_t csum) { /* The highest reasonable value for an IPv4 header * checksum requires two folds, so we just do that always. @@ -247,7 +253,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum) return (uint16_t)~csum; } -static void pkt_ipv4_checksum(struct iphdr *iph) +static INLINING void pkt_ipv4_checksum(struct iphdr *iph) { iph->check = 0; @@ -268,10 +274,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph) iph->check = pkt_checksum_fold(acc); } -static bool pkt_skip_ipv6_extension_headers(buf_t *pkt, - const struct ipv6hdr *ipv6, - uint8_t *upper_proto, - bool *is_fragment) +static INLINING +bool pkt_skip_ipv6_extension_headers(buf_t *pkt, + const struct ipv6hdr *ipv6, + uint8_t *upper_proto, + bool *is_fragment) { /* We understand five extension headers. * https://tools.ietf.org/html/rfc8200#section-4.1 states that all @@ -336,7 +343,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt, * scratch is allocated on the stack. However, this usage should be safe since * it's the callers stack after all. */ -static inline __attribute__((__always_inline__)) struct ipv6hdr * +static __always_inline struct ipv6hdr * pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, bool *is_fragment) { @@ -354,20 +361,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, /* Global metrics, per CPU */ -struct bpf_map_def metrics_map SEC("maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(unsigned int), - .value_size = sizeof(metrics_t), - .max_entries = 1, -}; - -static metrics_t *get_global_metrics(void) +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, metrics_t); +} metrics_map SEC(".maps"); + +static INLINING metrics_t *get_global_metrics(void) { uint64_t key = 0; return bpf_map_lookup_elem(&metrics_map, &key); } -static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) +static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) { const int payload_off = sizeof(*encap) + @@ -388,8 +395,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) return bpf_redirect(skb->ifindex, BPF_F_INGRESS); } -static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, - struct in_addr *next_hop, metrics_t *metrics) +static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, + struct in_addr *next_hop, metrics_t *metrics) { metrics->forwarded_packets_total_gre++; @@ -509,8 +516,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, return bpf_redirect(skb->ifindex, 0); } -static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, - struct in_addr *next_hop, metrics_t *metrics) +static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, + struct in_addr *next_hop, metrics_t *metrics) { /* swap L2 addresses */ /* This assumes that packets are received from a router. @@ -546,7 +553,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, return bpf_redirect(skb->ifindex, 0); } -static ret_t skip_next_hops(buf_t *pkt, int n) +static INLINING ret_t skip_next_hops(buf_t *pkt, int n) { switch (n) { case 1: @@ -566,8 +573,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n) * pkt is positioned just after the variable length GLB header * iff the call is successful. */ -static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, - struct in_addr *next_hop) +static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, + struct in_addr *next_hop) { if (encap->unigue.next_hop > encap->unigue.hop_count) { return TC_ACT_SHOT; @@ -601,8 +608,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, * return value, and calling code works while still being "generic" to * IPv4 and IPv6. */ -static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, - uint64_t iphlen, uint16_t sport, uint16_t dport) +static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, + uint64_t iphlen, uint16_t sport, uint16_t dport) { switch (iphlen) { case sizeof(struct iphdr): { @@ -630,9 +637,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, } } -static verdict_t classify_tcp(struct __sk_buff *skb, - struct bpf_sock_tuple *tuple, uint64_t tuplen, - void *iph, struct tcphdr *tcp) +static INLINING verdict_t classify_tcp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, uint64_t tuplen, + void *iph, struct tcphdr *tcp) { struct bpf_sock *sk = bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); @@ -663,8 +670,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb, return UNKNOWN; } -static verdict_t classify_udp(struct __sk_buff *skb, - struct bpf_sock_tuple *tuple, uint64_t tuplen) +static INLINING verdict_t classify_udp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, uint64_t tuplen) { struct bpf_sock *sk = bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); @@ -681,9 +688,9 @@ static verdict_t classify_udp(struct __sk_buff *skb, return UNKNOWN; } -static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, - struct bpf_sock_tuple *tuple, uint64_t tuplen, - metrics_t *metrics) +static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, + struct bpf_sock_tuple *tuple, uint64_t tuplen, + metrics_t *metrics) { switch (proto) { case IPPROTO_TCP: @@ -698,7 +705,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, } } -static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) { struct icmphdr icmp; if (!buf_copy(pkt, &icmp, sizeof(icmp))) { @@ -745,7 +752,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) sizeof(tuple.ipv4), metrics); } -static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) { struct icmp6hdr icmp6; if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) { @@ -797,8 +804,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) metrics); } -static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, - metrics_t *metrics) +static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, + metrics_t *metrics) { metrics->l4_protocol_packets_total_tcp++; @@ -819,8 +826,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp); } -static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, - metrics_t *metrics) +static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, + metrics_t *metrics) { metrics->l4_protocol_packets_total_udp++; @@ -837,7 +844,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, return classify_udp(pkt->skb, &tuple, tuplen); } -static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) { metrics->l3_protocol_packets_total_ipv4++; @@ -874,7 +881,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) } } -static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) { metrics->l3_protocol_packets_total_ipv6++; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c new file mode 100644 index 000000000000..eed26b70e3a2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c @@ -0,0 +1,2 @@ +#define SUBPROGS +#include "test_cls_redirect.c" -- cgit v1.3.1 From 95cec14b0308085c028c4d4fb3d09fad3902b4c3 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 3 Sep 2020 13:05:28 -0700 Subject: selftests/bpf: Fix check in global_data_init. The returned value of bpf_object__open_file() should be checked with libbpf_get_error() rather than NULL. This fix prevents test_progs from crash when test_global_data.o is not present. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200903200528.747884-1-haoluo@google.com --- tools/testing/selftests/bpf/prog_tests/global_data_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index 3bdaa5a40744..ee46b11f1f9a 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -12,7 +12,8 @@ void test_global_data_init(void) size_t sz; obj = bpf_object__open_file(file, NULL); - if (CHECK_FAIL(!obj)) + err = libbpf_get_error(obj); + if (CHECK_FAIL(err)) return; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); -- cgit v1.3.1 From 09d1de1a8e7b9fbf08e96e0aecf41f870e4433ee Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 2 Sep 2020 12:21:29 +0200 Subject: tests: port pidfd_wait to kselftest harness All of the new pidfd selftests already use the new kselftest harness infrastructure. It makes for clearer output, makes the code easier to understand, and makes adding new tests way simpler. Signed-off-by: Christian Brauner Reviewed-by: Josh Triplett Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Link: https://lore.kernel.org/r/20200902102130.147672-4-christian.brauner@ubuntu.com --- tools/testing/selftests/pidfd/pidfd_wait.c | 213 ++++++----------------------- 1 file changed, 39 insertions(+), 174 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 7079f8eef792..075c716f6fb8 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -17,7 +17,7 @@ #include #include "pidfd.h" -#include "../kselftest.h" +#include "../kselftest_harness.h" #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) @@ -32,9 +32,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, return syscall(__NR_waitid, which, pid, info, options, ru); } -static int test_pidfd_wait_simple(void) +TEST(wait_simple) { - const char *test_name = "pidfd wait simple"; int pidfd = -1, status = 0; pid_t parent_tid = -1; struct clone_args args = { @@ -50,76 +49,40 @@ static int test_pidfd_wait_simple(void) }; pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); - if (pidfd < 0) - ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n", - test_name, strerror(errno)); + ASSERT_GE(pidfd, 0); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid == 0) - ksft_exit_fail_msg( - "%s test: succeeded to wait on invalid pidfd %s\n", - test_name, strerror(errno)); - close(pidfd); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); pidfd = -1; pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); - if (pidfd == 0) - ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n", - test_name, strerror(errno)); + ASSERT_GE(pidfd, 0); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid == 0) - ksft_exit_fail_msg( - "%s test: succeeded to wait on invalid pidfd %s\n", - test_name, strerror(errno)); - close(pidfd); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); pidfd = -1; pid = sys_clone3(&args); - if (pid < 0) - ksft_exit_fail_msg("%s test: failed to create new process %s\n", - test_name, strerror(errno)); + ASSERT_GE(pid, 1); if (pid == 0) exit(EXIT_SUCCESS); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status)) - ksft_exit_fail_msg( - "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - close(pidfd); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_EXITED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ksft_test_result_pass("%s test: Passed\n", test_name); - return 0; + ASSERT_GE(pid, 0); + ASSERT_EQ(WIFEXITED(info.si_status), true); + ASSERT_EQ(WEXITSTATUS(info.si_status), 0); + EXPECT_EQ(close(pidfd), 0); + + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); } -static int test_pidfd_wait_states(void) +TEST(wait_states) { - const char *test_name = "pidfd wait states"; int pidfd = -1, status = 0; pid_t parent_tid = -1; struct clone_args args = { @@ -135,9 +98,7 @@ static int test_pidfd_wait_states(void) }; pid = sys_clone3(&args); - if (pid < 0) - ksft_exit_fail_msg("%s test: failed to create new process %s\n", - test_name, strerror(errno)); + ASSERT_GE(pid, 0); if (pid == 0) { kill(getpid(), SIGSTOP); @@ -145,127 +106,31 @@ static int test_pidfd_wait_states(void) exit(EXIT_SUCCESS); } - ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_STOPPED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - if (info.si_code != CLD_CONTINUED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_CONTINUED); + ASSERT_EQ(info.si_pid, parent_tid); - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); - ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0); - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_KILLED); + ASSERT_EQ(info.si_pid, parent_tid); - if (info.si_code != CLD_STOPPED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_KILLED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - close(pidfd); - - ksft_test_result_pass("%s test: Passed\n", test_name); - return 0; + EXPECT_EQ(close(pidfd), 0); } -int main(int argc, char **argv) -{ - ksft_print_header(); - ksft_set_plan(2); - - test_pidfd_wait_simple(); - test_pidfd_wait_states(); - - return ksft_exit_pass(); -} +TEST_HARNESS_MAIN -- cgit v1.3.1 From 4beba9486abd2f86d125271d6946f7c38ed0fe77 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 22 Apr 2020 15:25:27 +0100 Subject: mm: Add PG_arch_2 page flag For arm64 MTE support it is necessary to be able to mark pages that contain user space visible tags that will need to be saved/restored e.g. when swapped out. To support this add a new arch specific flag (PG_arch_2). This flag is only available on 64-bit architectures due to the limited number of spare page flags on the 32-bit ones. Signed-off-by: Steven Price [catalin.marinas@arm.com: use CONFIG_64BIT for guarding this new flag] Signed-off-by: Catalin Marinas Cc: Andrew Morton --- fs/proc/page.c | 3 +++ include/linux/kernel-page-flags.h | 1 + include/linux/page-flags.h | 3 +++ include/trace/events/mmflags.h | 9 ++++++++- tools/vm/page-types.c | 2 ++ 5 files changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/fs/proc/page.c b/fs/proc/page.c index f909243d4a66..9f1077d94cde 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -217,6 +217,9 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); +#ifdef CONFIG_64BIT + u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); +#endif return u; }; diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index abd20ef93c98..eee1877a354e 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -17,5 +17,6 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6be1aa559b1e..276140c94f4a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -135,6 +135,9 @@ enum pageflags { #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) PG_young, PG_idle, +#endif +#ifdef CONFIG_64BIT + PG_arch_2, #endif __NR_PAGEFLAGS, diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 5fb752034386..67018d367b9f 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -79,6 +79,12 @@ #define IF_HAVE_PG_IDLE(flag,string) #endif +#ifdef CONFIG_64BIT +#define IF_HAVE_PG_ARCH_2(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_ARCH_2(flag,string) +#endif + #define __def_pageflag_names \ {1UL << PG_locked, "locked" }, \ {1UL << PG_waiters, "waiters" }, \ @@ -105,7 +111,8 @@ IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ IF_HAVE_PG_IDLE(PG_young, "young" ) \ -IF_HAVE_PG_IDLE(PG_idle, "idle" ) +IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ +IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 58c0eab71bca..0517c744b04e 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -78,6 +78,7 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 /* [48-] take some arbitrary free slots for expanding overloaded flags * not part of kernel API @@ -135,6 +136,7 @@ static const char * const page_flag_names[] = { [KPF_ARCH] = "h:arch", [KPF_UNCACHED] = "c:uncached", [KPF_SOFTDIRTY] = "f:softdirty", + [KPF_ARCH_2] = "H:arch_2", [KPF_READAHEAD] = "I:readahead", [KPF_SLOB_FREE] = "P:slob_free", -- cgit v1.3.1 From cd89597bbe5a0179b3f0f51604cb36a1ffd4e080 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 2 Sep 2020 12:21:30 +0200 Subject: tests: add waitid() tests for non-blocking pidfds Verify that the PIDFD_NONBLOCK flag works with pidfd_open() and that waitid() with a non-blocking pidfd returns EAGAIN: TAP version 13 1..3 # Starting 3 tests from 1 test cases. # RUN global.wait_simple ... # OK global.wait_simple ok 1 global.wait_simple # RUN global.wait_states ... # OK global.wait_states ok 2 global.wait_states # RUN global.wait_nonblock ... # OK global.wait_nonblock ok 3 global.wait_nonblock # PASSED: 3 / 3 tests passed. # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Christian Brauner Reviewed-by: Josh Triplett Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Link: https://lore.kernel.org/r/20200902102130.147672-5-christian.brauner@ubuntu.com --- tools/testing/selftests/pidfd/pidfd.h | 4 ++ tools/testing/selftests/pidfd/pidfd_wait.c | 91 +++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index a2c80914e3dc..01f8d3c0cf2c 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -46,6 +46,10 @@ #define __NR_pidfd_getfd -1 #endif +#ifndef PIDFD_NONBLOCK +#define PIDFD_NONBLOCK O_NONBLOCK +#endif + /* * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c * That means, when it wraps around any pid < 300 will be skipped. diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 075c716f6fb8..4063d6f31fa4 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -21,6 +21,11 @@ #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + static pid_t sys_clone3(struct clone_args *args) { return syscall(__NR_clone3, args, sizeof(struct clone_args)); @@ -65,7 +70,7 @@ TEST(wait_simple) pidfd = -1; pid = sys_clone3(&args); - ASSERT_GE(pid, 1); + ASSERT_GE(pid, 0); if (pid == 0) exit(EXIT_SUCCESS); @@ -133,4 +138,88 @@ TEST(wait_states) EXPECT_EQ(close(pidfd), 0); } +TEST(wait_nonblock) +{ + int pidfd, status = 0; + unsigned int flags = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .flags = CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + /* + * Callers need to see ECHILD with non-blocking pidfds when no child + * processes exists. + */ + pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, ECHILD); + EXPECT_EQ(close(pidfd), 0); + + pid = sys_clone3(&args); + ASSERT_GE(pid, 0); + + if (pid == 0) { + kill(getpid(), SIGSTOP); + exit(EXIT_SUCCESS); + } + + pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + flags = fcntl(pidfd, F_GETFL, 0); + ASSERT_GT(flags, 0); + ASSERT_GT((flags & O_NONBLOCK), 0); + + /* + * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when + * child processes exist but none have exited. + */ + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EAGAIN); + + /* + * Callers need to continue seeing 0 with non-blocking pidfd and + * WNOHANG raised explicitly when child processes exist but none have + * exited. + */ + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); + ASSERT_EQ(ret, 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); + + ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); + + EXPECT_EQ(close(pidfd), 0); +} + TEST_HARNESS_MAIN -- cgit v1.3.1 From 17e54b096e6a8dc92b92c59c2e3437550383b27a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 21:16:10 -0700 Subject: libbpf: Fix another __u64 cast in printf Another issue of __u64 needing either %lu or %llu, depending on the architecture. Fix with cast to `unsigned long long`. Fixes: 7e06aad52929 ("libbpf: Add multi-prog section support for struct_ops") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200904041611.1695163-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 47b43c13eee5..53be32a2b9fc 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8224,7 +8224,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } if (sym.st_value % BPF_INSN_SZ) { pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", - map->name, (__u64)sym.st_value); + map->name, (unsigned long long)sym.st_value); return -LIBBPF_ERRNO__FORMAT; } insn_idx = sym.st_value / BPF_INSN_SZ; -- cgit v1.3.1 From 8eb629585d2231e90112148009e2a11b0979ca38 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Sep 2020 21:16:11 -0700 Subject: libbpf: Fix potential multiplication overflow Detected by LGTM static analyze in Github repo, fix potential multiplication overflow before result is casted to size_t. Fixes: 8505e8709b5e ("libbpf: Implement generalized .BTF.ext func/line info adjustment") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200904041611.1695163-2-andriin@fb.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 53be32a2b9fc..550950eb1860 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5802,7 +5802,7 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, /* append func/line info of a given (sub-)program to the main * program func/line info */ - old_sz = (*prog_rec_cnt) * ext_info->rec_size; + old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; new_sz = old_sz + (copy_end - copy_start); new_prog_info = realloc(*prog_info, new_sz); if (!new_prog_info) -- cgit v1.3.1 From e71e19a9ea70952a53d58a99971820ce6c1794a8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 3 Sep 2020 13:44:39 -0300 Subject: tools features: Add feature test to check if libbfd has buildid support Which is needed by the PE executable support, for instance. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jacek Caban Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Remi Bernon Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 2 ++ tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-libbfd-buildid.c | 8 ++++++++ tools/perf/Makefile.config | 6 ++++++ 5 files changed, 25 insertions(+) create mode 100644 tools/build/feature/test-libbfd-buildid.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index c1daf4d57518..8b381d8ec9de 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -41,6 +41,7 @@ FEATURE_TESTS_BASIC := \ gtk2 \ gtk2-infobar \ libbfd \ + libbfd-buildid \ libcap \ libelf \ libelf-getphdrnum \ @@ -113,6 +114,7 @@ FEATURE_DISPLAY ?= \ glibc \ gtk2 \ libbfd \ + libbfd-buildid \ libcap \ libelf \ libnuma \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index d220fe952747..9e5f8db4a168 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -15,6 +15,7 @@ FILES= \ test-hello.bin \ test-libaudit.bin \ test-libbfd.bin \ + test-libbfd-buildid.bin \ test-disassembler-four-args.bin \ test-reallocarray.bin \ test-libbfd-liberty.bin \ @@ -229,6 +230,9 @@ $(OUTPUT)test-libpython-version.bin: $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl +$(OUTPUT)test-libbfd-buildid.bin: + $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl + $(OUTPUT)test-disassembler-four-args.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 5479e543b194..80c5795f324b 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -90,6 +90,10 @@ # include "test-libbfd.c" #undef main +#define main main_test_libbfd_buildid +# include "test-libbfd-buildid.c" +#undef main + #define main main_test_backtrace # include "test-backtrace.c" #undef main @@ -208,6 +212,7 @@ int main(int argc, char *argv[]) main_test_gtk2(argc, argv); main_test_gtk2_infobar(argc, argv); main_test_libbfd(); + main_test_libbfd_buildid(); main_test_backtrace(); main_test_libnuma(); main_test_numa_num_possible_cpus(); diff --git a/tools/build/feature/test-libbfd-buildid.c b/tools/build/feature/test-libbfd-buildid.c new file mode 100644 index 000000000000..157644b04c05 --- /dev/null +++ b/tools/build/feature/test-libbfd-buildid.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + bfd *abfd = bfd_openr("Pedro", 0); + return abfd && (!abfd->build_id || abfd->build_id->size > 0x506564726f); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 190be4fa5c21..f73a85ea3e7f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -825,6 +825,12 @@ else $(call feature_check,disassembler-four-args) endif +ifeq ($(feature-libbfd-buildid), 1) + CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT +else + msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available); +endif + ifdef NO_DEMANGLE CFLAGS += -DNO_DEMANGLE else -- cgit v1.3.1 From ba0509dcb7f806403b23234320711c45be9dccec Mon Sep 17 00:00:00 2001 From: Remi Bernon Date: Fri, 21 Aug 2020 18:52:36 +0200 Subject: perf dso: Use libbfd to read build_id and .gnu_debuglink section Wine generates PE binaries for most of its modules and perf is unable to parse these files to get build_id or .gnu_debuglink section. Using libbfd when available, instead of libelf, makes it possible to resolve debug file location regardless of the dso binary format. Committer notes: Made the filename__read_build_id() variant that uses abfd->build_id depend on the feature test that defines HAVE_LIBBFD_BUILDID_SUPPORT, to get this to continue building with older libbfd/binutils. Signed-off-by: Remi Bernon Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jacek Caban Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200821165238.1340315-1-rbernon@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 80 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8cc4b0059fb0..94a156df22d5 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -50,6 +50,10 @@ typedef Elf64_Nhdr GElf_Nhdr; #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ #endif +#ifdef HAVE_LIBBFD_SUPPORT +#define PACKAGE 'perf' +#include +#else #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT extern char *cplus_demangle(const char *, int); @@ -65,9 +69,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, { return NULL; } -#else -#define PACKAGE 'perf' -#include +#endif #endif #endif @@ -530,6 +532,36 @@ out: return err; } +#ifdef HAVE_LIBBFD_BUILDID_SUPPORT + +int filename__read_build_id(const char *filename, void *bf, size_t size) +{ + int err = -1; + bfd *abfd; + + abfd = bfd_openr(filename, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); + goto out_close; + } + + if (!abfd->build_id || abfd->build_id->size > size) + goto out_close; + + memcpy(bf, abfd->build_id->data, abfd->build_id->size); + memset(bf + abfd->build_id->size, 0, size - abfd->build_id->size); + err = abfd->build_id->size; + +out_close: + bfd_close(abfd); + return err; +} + +#else // HAVE_LIBBFD_BUILDID_SUPPORT + int filename__read_build_id(const char *filename, void *bf, size_t size) { int fd, err = -1; @@ -557,6 +589,8 @@ out: return err; } +#endif // HAVE_LIBBFD_BUILDID_SUPPORT + int sysfs__read_build_id(const char *filename, void *build_id, size_t size) { int fd, err = -1; @@ -608,6 +642,44 @@ out: return err; } +#ifdef HAVE_LIBBFD_SUPPORT + +int filename__read_debuglink(const char *filename, char *debuglink, + size_t size) +{ + int err = -1; + asection *section; + bfd *abfd; + + abfd = bfd_openr(filename, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); + goto out_close; + } + + section = bfd_get_section_by_name(abfd, ".gnu_debuglink"); + if (!section) + goto out_close; + + if (section->size > size) + goto out_close; + + if (!bfd_get_section_contents(abfd, section, debuglink, 0, + section->size)) + goto out_close; + + err = 0; + +out_close: + bfd_close(abfd); + return err; +} + +#else + int filename__read_debuglink(const char *filename, char *debuglink, size_t size) { @@ -660,6 +732,8 @@ out: return err; } +#endif + static int dso__swap_init(struct dso *dso, unsigned char eidata) { static unsigned int const endian = 1; -- cgit v1.3.1 From eac9a4342e5447cade4d484261a8992e2ec6f138 Mon Sep 17 00:00:00 2001 From: Remi Bernon Date: Fri, 21 Aug 2020 18:52:37 +0200 Subject: perf symbols: Try reading the symbol table with libbfd Wine generates PE binaries for its code modules and also generates debug files in PE or PDB formats, which perf cannot parse either. Trying to read symbols on non-ELF binaries with libbfd, when supported, makes it possible for perf to report symbols and annotations for Windows applications running under Wine. Because libbfd doesn't provide symbol size (probably because of some backends not supporting it), we compute it by first sorting the symbols by addresses and then considering that they are sequential in a given section. v3: Also include local and weak bfd symbols and mark them as such, only global symbols were previously reported, and that caused a very imprecise address to symbol resolution. Signed-off-by: Remi Bernon Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jacek Caban Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200821165238.1340315-2-rbernon@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/symbol.h | 4 ++ 2 files changed, 144 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5151a8c0b791..5ddf76fb691c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1526,6 +1526,138 @@ out_failure: return -1; } +#ifdef HAVE_LIBBFD_SUPPORT +#define PACKAGE 'perf' +#include + +static int bfd_symbols__cmpvalue(const void *a, const void *b) +{ + const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b; + + if (bfd_asymbol_value(as) != bfd_asymbol_value(bs)) + return bfd_asymbol_value(as) - bfd_asymbol_value(bs); + + return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0]; +} + +static int bfd2elf_binding(asymbol *symbol) +{ + if (symbol->flags & BSF_WEAK) + return STB_WEAK; + if (symbol->flags & BSF_GLOBAL) + return STB_GLOBAL; + if (symbol->flags & BSF_LOCAL) + return STB_LOCAL; + return -1; +} + +int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) +{ + int err = -1; + long symbols_size, symbols_count; + asection *section; + asymbol **symbols, *sym; + struct symbol *symbol; + bfd *abfd; + u_int i; + u64 start, len; + + abfd = bfd_openr(dso->long_name, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, + dso->long_name); + goto out_close; + } + + if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) + goto out_close; + + section = bfd_get_section_by_name(abfd, ".text"); + if (section) + dso->text_offset = section->vma - section->filepos; + + bfd_close(abfd); + + abfd = bfd_openr(debugfile, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, + debugfile); + goto out_close; + } + + if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) + goto out_close; + + symbols_size = bfd_get_symtab_upper_bound(abfd); + if (symbols_size == 0) { + bfd_close(abfd); + return 0; + } + + if (symbols_size < 0) + goto out_close; + + symbols = malloc(symbols_size); + if (!symbols) + goto out_close; + + symbols_count = bfd_canonicalize_symtab(abfd, symbols); + if (symbols_count < 0) + goto out_free; + + qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); + +#ifdef bfd_get_section +#define bfd_asymbol_section bfd_get_section +#endif + for (i = 0; i < symbols_count; ++i) { + sym = symbols[i]; + section = bfd_asymbol_section(sym); + if (bfd2elf_binding(sym) < 0) + continue; + + while (i + 1 < symbols_count && + bfd_asymbol_section(symbols[i + 1]) == section && + bfd2elf_binding(symbols[i + 1]) < 0) + i++; + + if (i + 1 < symbols_count && + bfd_asymbol_section(symbols[i + 1]) == section) + len = symbols[i + 1]->value - sym->value; + else + len = section->size - sym->value; + + start = bfd_asymbol_value(sym) - dso->text_offset; + symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC, + bfd_asymbol_name(sym)); + if (!symbol) + goto out_free; + + symbols__insert(&dso->symbols, symbol); + } +#ifdef bfd_get_section +#undef bfd_asymbol_section +#endif + + symbols__fixup_end(&dso->symbols); + symbols__fixup_duplicate(&dso->symbols); + dso->adjust_symbols = 1; + + err = 0; +out_free: + free(symbols); +out_close: + bfd_close(abfd); + return err; +} +#endif + static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, enum dso_binary_type type) { @@ -1699,6 +1831,7 @@ int dso__load(struct dso *dso, struct map *map) bool next_slot = false; bool is_reg; bool nsexit; + int bfdrc = -1; int sirc = -1; enum dso_binary_type symtab_type = binary_type_symtab[i]; @@ -1717,12 +1850,19 @@ int dso__load(struct dso *dso, struct map *map) nsinfo__mountns_exit(&nsc); is_reg = is_regular_file(name); +#ifdef HAVE_LIBBFD_SUPPORT if (is_reg) + bfdrc = dso__load_bfd_symbols(dso, name); +#endif + if (is_reg && bfdrc < 0) sirc = symsrc__init(ss, dso, name, symtab_type); if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); + if (bfdrc == 0) + break; + if (!is_reg || sirc < 0) continue; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ff4f4c47e148..11fe71f46d14 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -175,6 +175,10 @@ int symbol__config_symfs(const struct option *opt __maybe_unused, struct symsrc; +#ifdef HAVE_LIBBFD_SUPPORT +int dso__load_bfd_symbols(struct dso *dso, const char *debugfile); +#endif + int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule); int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss); -- cgit v1.3.1 From ed21d6d7c48e6e96c2d617e304a7ebfbd17b1807 Mon Sep 17 00:00:00 2001 From: Remi Bernon Date: Fri, 21 Aug 2020 18:52:38 +0200 Subject: perf tests: Add test for PE binary format support This adds a precompiled file in PE binary format, with split debug file, and tries to read its build_id and .gnu_debuglink sections, as well as looking up the main symbol from the debug file. This should succeed if libbfd is supported. Committer testing: $ perf test "PE file support" 68: PE file support : Ok $ Signed-off-by: Remi Bernon Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jacek Caban Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200821165238.1340315-3-rbernon@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 1 + tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 ++ tools/perf/tests/pe-file-parsing.c | 98 +++++++++++++++++++++++++++++++++++++ tools/perf/tests/pe-file.c | 14 ++++++ tools/perf/tests/pe-file.exe | Bin 0 -> 75595 bytes tools/perf/tests/pe-file.exe.debug | Bin 0 -> 141644 bytes tools/perf/tests/tests.h | 1 + 8 files changed, 119 insertions(+) create mode 100644 tools/perf/tests/pe-file-parsing.c create mode 100644 tools/perf/tests/pe-file.c create mode 100644 tools/perf/tests/pe-file.exe create mode 100644 tools/perf/tests/pe-file.exe.debug (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6031167939ae..b2a3f5b652fe 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -961,6 +961,7 @@ install-tests: all install-gtk $(call QUIET_INSTALL, tests) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ + $(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 84352fc49a20..69bea7996f18 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -60,6 +60,7 @@ perf-y += api-io.o perf-y += demangle-java-test.o perf-y += pfm.o perf-y += parse-metric.o +perf-y += pe-file-parsing.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index d328caaba45d..651b8ea3354a 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -341,6 +341,10 @@ static struct test generic_tests[] = { .desc = "Parse and process metrics", .func = test__parse_metric, }, + { + .desc = "PE file support", + .func = test__pe_file_parsing, + }, { .func = NULL, }, diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c new file mode 100644 index 000000000000..19eae3e8e229 --- /dev/null +++ b/tools/perf/tests/pe-file-parsing.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "util/build-id.h" +#include "util/symbol.h" +#include "util/dso.h" + +#include "tests.h" + +#ifdef HAVE_LIBBFD_SUPPORT + +static int run_dir(const char *d) +{ + char filename[PATH_MAX]; + char debugfile[PATH_MAX]; + char build_id[BUILD_ID_SIZE]; + char debuglink[PATH_MAX]; + char expect_build_id[] = { + 0x5a, 0x0f, 0xd8, 0x82, 0xb5, 0x30, 0x84, 0x22, + 0x4b, 0xa4, 0x7b, 0x62, 0x4c, 0x55, 0xa4, 0x69, + }; + char expect_debuglink[PATH_MAX] = "pe-file.exe.debug"; + struct dso *dso; + struct symbol *sym; + int ret; + + scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d); + ret = filename__read_build_id(filename, build_id, BUILD_ID_SIZE); + TEST_ASSERT_VAL("Failed to read build_id", + ret == sizeof(expect_build_id)); + TEST_ASSERT_VAL("Wrong build_id", !memcmp(build_id, expect_build_id, + sizeof(expect_build_id))); + + ret = filename__read_debuglink(filename, debuglink, PATH_MAX); + TEST_ASSERT_VAL("Failed to read debuglink", ret == 0); + TEST_ASSERT_VAL("Wrong debuglink", + !strcmp(debuglink, expect_debuglink)); + + scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink); + ret = filename__read_build_id(debugfile, build_id, BUILD_ID_SIZE); + TEST_ASSERT_VAL("Failed to read debug file build_id", + ret == sizeof(expect_build_id)); + TEST_ASSERT_VAL("Wrong build_id", !memcmp(build_id, expect_build_id, + sizeof(expect_build_id))); + + dso = dso__new(filename); + TEST_ASSERT_VAL("Failed to get dso", dso); + + ret = dso__load_bfd_symbols(dso, debugfile); + TEST_ASSERT_VAL("Failed to load symbols", ret == 0); + + dso__sort_by_name(dso); + sym = dso__find_symbol_by_name(dso, "main"); + TEST_ASSERT_VAL("Failed to find main", sym); + dso__delete(dso); + + return TEST_OK; +} + +int test__pe_file_parsing(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + struct stat st; + char path_dir[PATH_MAX]; + + /* First try development tree tests. */ + if (!lstat("./tests", &st)) + return run_dir("./tests"); + + /* Then installed path. */ + snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path()); + + if (!lstat(path_dir, &st)) + return run_dir(path_dir); + + return TEST_SKIP; +} + +#else + +int test__pe_file_parsing(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + return TEST_SKIP; +} + +#endif diff --git a/tools/perf/tests/pe-file.c b/tools/perf/tests/pe-file.c new file mode 100644 index 000000000000..eb3df5e9886f --- /dev/null +++ b/tools/perf/tests/pe-file.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +// pe-file.exe and pe-file.exe.debug built with; +// x86_64-w64-mingw32-gcc -o pe-file.exe pe-file.c +// -Wl,--file-alignment,4096 -Wl,--build-id +// x86_64-w64-mingw32-objcopy --only-keep-debug +// --compress-debug-sections pe-file.exe pe-file.exe.debug +// x86_64-w64-mingw32-objcopy --strip-debug +// --add-gnu-debuglink=pe-file.exe.debug pe-file.exe + +int main(int argc, char const *argv[]) +{ + return 0; +} diff --git a/tools/perf/tests/pe-file.exe b/tools/perf/tests/pe-file.exe new file mode 100644 index 000000000000..838a46dae724 Binary files /dev/null and b/tools/perf/tests/pe-file.exe differ diff --git a/tools/perf/tests/pe-file.exe.debug b/tools/perf/tests/pe-file.exe.debug new file mode 100644 index 000000000000..287d6718d6c9 Binary files /dev/null and b/tools/perf/tests/pe-file.exe.debug differ diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 4447a516c689..ef0f33c6ba23 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -122,6 +122,7 @@ int test__pfm(struct test *test, int subtest); const char *test__pfm_subtest_get_desc(int subtest); int test__pfm_subtest_get_nr(void); int test__parse_metric(struct test *test, int subtest); +int test__pe_file_parsing(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); -- cgit v1.3.1 From 9864a66defeb8df0ef1487e6d195278cf3e33666 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 1 Sep 2020 12:37:53 +0300 Subject: perf tools: Consolidate --control option parsing into one function Consolidate --control option parsing into one function, in preparation for adding FIFO file name options. Signed-off-by: Adrian Hunter Acked-by: Alexey Budankov Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200901093758.32293-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 22 ++-------------------- tools/perf/builtin-stat.c | 22 ++-------------------- tools/perf/util/evlist.c | 24 ++++++++++++++++++++++++ tools/perf/util/evlist.h | 1 + 4 files changed, 29 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 772f1057647f..50591e07aa4f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2234,27 +2234,9 @@ static int parse_control_option(const struct option *opt, const char *str, int unset __maybe_unused) { - char *comma = NULL, *endptr = NULL; - struct record_opts *config = (struct record_opts *)opt->value; - - if (strncmp(str, "fd:", 3)) - return -EINVAL; - - config->ctl_fd = strtoul(&str[3], &endptr, 0); - if (endptr == &str[3]) - return -EINVAL; - - comma = strchr(str, ','); - if (comma) { - if (endptr != comma) - return -EINVAL; - - config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0); - if (endptr == comma + 1 || *endptr != '\0') - return -EINVAL; - } + struct record_opts *opts = opt->value; - return 0; + return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack); } static void switch_output_size_warn(struct record *rec) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fddc97cac984..ea072d47c560 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1045,27 +1045,9 @@ static int parse_control_option(const struct option *opt, const char *str, int unset __maybe_unused) { - char *comma = NULL, *endptr = NULL; - struct perf_stat_config *config = (struct perf_stat_config *)opt->value; + struct perf_stat_config *config = opt->value; - if (strncmp(str, "fd:", 3)) - return -EINVAL; - - config->ctl_fd = strtoul(&str[3], &endptr, 0); - if (endptr == &str[3]) - return -EINVAL; - - comma = strchr(str, ','); - if (comma) { - if (endptr != comma) - return -EINVAL; - - config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0); - if (endptr == comma + 1 || *endptr != '\0') - return -EINVAL; - } - - return 0; + return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack); } static struct option stat_options[] = { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e3fa3bf7498a..62e3f87547ce 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1727,6 +1727,30 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, return leader; } +int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack) +{ + char *comma = NULL, *endptr = NULL; + + if (strncmp(str, "fd:", 3)) + return -EINVAL; + + *ctl_fd = strtoul(&str[3], &endptr, 0); + if (endptr == &str[3]) + return -EINVAL; + + comma = strchr(str, ','); + if (comma) { + if (endptr != comma) + return -EINVAL; + + *ctl_fd_ack = strtoul(comma + 1, &endptr, 0); + if (endptr == comma + 1 || *endptr != '\0') + return -EINVAL; + } + + return 0; +} + int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack) { if (fd == -1) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c73f7f7f120b..a5a5a07d5c55 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -373,6 +373,7 @@ enum evlist_ctl_cmd { EVLIST_CTL_CMD_ACK }; +int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack); int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack); int evlist__finalize_ctlfd(struct evlist *evlist); bool evlist__ctlfd_initialized(struct evlist *evlist); -- cgit v1.3.1 From 40db8ff59e75af108d695597bdbaa4828e129178 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 1 Sep 2020 12:37:54 +0300 Subject: perf tools: Handle read errors from ctl_fd Handle read errors from ctl_fd such as EINTR, EAGAIN and EWOULDBLOCK. Signed-off-by: Adrian Hunter Acked-by: Alexey Budankov Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200901093758.32293-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 62e3f87547ce..47d1045a19af 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1802,6 +1802,7 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, char c; size_t bytes_read = 0; + *cmd = EVLIST_CTL_CMD_UNSUPPORTED; memset(cmd_data, 0, data_size); data_size--; @@ -1813,17 +1814,22 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, cmd_data[bytes_read++] = c; if (bytes_read == data_size) break; - } else { - if (err == -1) + continue; + } else if (err == -1) { + if (errno == EINTR) + continue; + if (errno == EAGAIN || errno == EWOULDBLOCK) + err = 0; + else pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd); - break; } + break; } while (1); pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data, bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0"); - if (err > 0) { + if (bytes_read > 0) { if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG, (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) { *cmd = EVLIST_CTL_CMD_ENABLE; @@ -1833,7 +1839,7 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, } } - return err; + return bytes_read ? (int)bytes_read : err; } static int evlist__ctlfd_ack(struct evlist *evlist) -- cgit v1.3.1 From 1f4390d825cc04e91a276c78f984dd2d7fe01e62 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 1 Sep 2020 12:37:55 +0300 Subject: perf tools: Use AsciiDoc formatting for --control option documentation The --control option does not display well in man pages unless AsciiDoc formatting is used. Signed-off-by: Adrian Hunter Acked-by: Alexey Budankov Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200901093758.32293-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 46 ++++++++++++++++---------------- tools/perf/Documentation/perf-stat.txt | 46 ++++++++++++++++---------------- 2 files changed, 46 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index bd50cdff08a8..34c80aa55948 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -631,43 +631,43 @@ option. The -e option and this one can be mixed and matched. Events can be grouped using the {} notation. endif::HAVE_LIBPFM[] ---control fd:ctl-fd[,ack-fd] +--control=fd:ctl-fd[,ack-fd]:: Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events). Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process. Example of bash shell script to enable and disable events during measurements: -#!/bin/bash + #!/bin/bash -ctl_dir=/tmp/ + ctl_dir=/tmp/ -ctl_fifo=${ctl_dir}perf_ctl.fifo -test -p ${ctl_fifo} && unlink ${ctl_fifo} -mkfifo ${ctl_fifo} -exec {ctl_fd}<>${ctl_fifo} + ctl_fifo=${ctl_dir}perf_ctl.fifo + test -p ${ctl_fifo} && unlink ${ctl_fifo} + mkfifo ${ctl_fifo} + exec {ctl_fd}<>${ctl_fifo} -ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo -test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} -mkfifo ${ctl_ack_fifo} -exec {ctl_fd_ack}<>${ctl_ack_fifo} + ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo + test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} + mkfifo ${ctl_ack_fifo} + exec {ctl_fd_ack}<>${ctl_ack_fifo} -perf record -D -1 -e cpu-cycles -a \ - --control fd:${ctl_fd},${ctl_fd_ack} \ - -- sleep 30 & -perf_pid=$! + perf record -D -1 -e cpu-cycles -a \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & + perf_pid=$! -sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" -sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" + sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" -exec {ctl_fd_ack}>&- -unlink ${ctl_ack_fifo} + exec {ctl_fd_ack}>&- + unlink ${ctl_ack_fifo} -exec {ctl_fd}>&- -unlink ${ctl_fifo} + exec {ctl_fd}>&- + unlink ${ctl_fifo} -wait -n ${perf_pid} -exit $? + wait -n ${perf_pid} + exit $? SEE ALSO diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index db420dd75e43..224795e97b1c 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -180,43 +180,43 @@ with it. --append may be used here. Examples: 3>results perf stat --log-fd 3 -- $cmd 3>>results perf stat --log-fd 3 --append -- $cmd ---control fd:ctl-fd[,ack-fd] +--control=fd:ctl-fd[,ack-fd]:: Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events). Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process. Example of bash shell script to enable and disable events during measurements: -#!/bin/bash + #!/bin/bash -ctl_dir=/tmp/ + ctl_dir=/tmp/ -ctl_fifo=${ctl_dir}perf_ctl.fifo -test -p ${ctl_fifo} && unlink ${ctl_fifo} -mkfifo ${ctl_fifo} -exec {ctl_fd}<>${ctl_fifo} + ctl_fifo=${ctl_dir}perf_ctl.fifo + test -p ${ctl_fifo} && unlink ${ctl_fifo} + mkfifo ${ctl_fifo} + exec {ctl_fd}<>${ctl_fifo} -ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo -test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} -mkfifo ${ctl_ack_fifo} -exec {ctl_fd_ack}<>${ctl_ack_fifo} + ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo + test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} + mkfifo ${ctl_ack_fifo} + exec {ctl_fd_ack}<>${ctl_ack_fifo} -perf stat -D -1 -e cpu-cycles -a -I 1000 \ - --control fd:${ctl_fd},${ctl_fd_ack} \ - -- sleep 30 & -perf_pid=$! + perf stat -D -1 -e cpu-cycles -a -I 1000 \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & + perf_pid=$! -sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" -sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" + sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" -exec {ctl_fd_ack}>&- -unlink ${ctl_ack_fifo} + exec {ctl_fd_ack}>&- + unlink ${ctl_ack_fifo} -exec {ctl_fd}>&- -unlink ${ctl_fifo} + exec {ctl_fd}>&- + unlink ${ctl_fifo} -wait -n ${perf_pid} -exit $? + wait -n ${perf_pid} + exit $? --pre:: -- cgit v1.3.1 From a8fcbd269b4340c36dbf99b4453bcbfe128d93fb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 2 Sep 2020 13:57:07 +0300 Subject: perf tools: Add FIFO file names as alternative options to --control Enable the --control option to accept file names as an alternative to file descriptors. Example: $ mkfifo perf.control $ mkfifo perf.ack $ cat perf.ack & [1] 6808 $ perf record --control fifo:perf.control,perf.ack -- sleep 300 & [2] 6810 $ echo disable > perf.control $ Events disabled ack $ echo enable > perf.control $ Events enabled ack $ echo disable > perf.control $ Events disabled ack $ kill %2 [ perf record: Woken up 4 times to write data ] $ [ perf record: Captured and wrote 0.018 MB perf.data (7 samples) ] [1]- Done cat perf.ack [2]+ Terminated perf record --control fifo:perf.control,perf.ack -- sleep 300 $ Signed-off-by: Adrian Hunter Acked-by: Alexey Budankov Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200902105707.11491-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 2 ++ tools/perf/Documentation/perf-stat.txt | 2 ++ tools/perf/builtin-record.c | 34 +++++++++++++++----- tools/perf/builtin-stat.c | 18 +++++++++-- tools/perf/util/evlist.c | 55 ++++++++++++++++++++++++++++++-- tools/perf/util/evlist.h | 2 +- tools/perf/util/record.h | 1 + tools/perf/util/stat.h | 1 + 8 files changed, 101 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 34c80aa55948..c1f44633abed 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -631,7 +631,9 @@ option. The -e option and this one can be mixed and matched. Events can be grouped using the {} notation. endif::HAVE_LIBPFM[] +--control=fifo:ctl-fifo[,ack-fifo]:: --control=fd:ctl-fd[,ack-fd]:: +ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events). Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 224795e97b1c..7d18694e592a 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -180,7 +180,9 @@ with it. --append may be used here. Examples: 3>results perf stat --log-fd 3 -- $cmd 3>>results perf stat --log-fd 3 --append -- $cmd +--control=fifo:ctl-fifo[,ack-fifo]:: --control=fd:ctl-fd[,ack-fd]:: +ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events). Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 50591e07aa4f..c83aec4940de 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2236,7 +2236,17 @@ static int parse_control_option(const struct option *opt, { struct record_opts *opts = opt->value; - return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack); + return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); +} + +static void close_control_option(struct record_opts *opts) +{ + if (opts->ctl_fd_close) { + opts->ctl_fd_close = false; + close(opts->ctl_fd); + if (opts->ctl_fd_ack >= 0) + close(opts->ctl_fd_ack); + } } static void switch_output_size_warn(struct record *rec) @@ -2578,9 +2588,10 @@ static struct option __record_options[] = { "libpfm4 event selector. use 'perf list' to list available events", parse_libpfm_events_option), #endif - OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]", + OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" - "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.", + "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" + "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), OPT_END() }; @@ -2653,12 +2664,14 @@ int cmd_record(int argc, const char **argv) !perf_can_record_switch_events()) { ui__error("kernel does not support recording context switch events\n"); parse_options_usage(record_usage, record_options, "switch-events", 0); - return -EINVAL; + err = -EINVAL; + goto out_opts; } if (switch_output_setup(rec)) { parse_options_usage(record_usage, record_options, "switch-output", 0); - return -EINVAL; + err = -EINVAL; + goto out_opts; } if (rec->switch_output.time) { @@ -2669,8 +2682,10 @@ int cmd_record(int argc, const char **argv) if (rec->switch_output.num_files) { rec->switch_output.filenames = calloc(sizeof(char *), rec->switch_output.num_files); - if (!rec->switch_output.filenames) - return -EINVAL; + if (!rec->switch_output.filenames) { + err = -EINVAL; + goto out_opts; + } } /* @@ -2686,7 +2701,8 @@ int cmd_record(int argc, const char **argv) rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits); if (!rec->affinity_mask.bits) { pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); - return -ENOMEM; + err = -ENOMEM; + goto out_opts; } pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits); } @@ -2817,6 +2833,8 @@ out: evlist__delete(rec->evlist); symbol__exit(); auxtrace_record__free(rec->itr); +out_opts: + close_control_option(&rec->opts); return err; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ea072d47c560..14688710195c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1047,7 +1047,17 @@ static int parse_control_option(const struct option *opt, { struct perf_stat_config *config = opt->value; - return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack); + return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close); +} + +static void close_control_option(struct perf_stat_config *config) +{ + if (config->ctl_fd_close) { + config->ctl_fd_close = false; + close(config->ctl_fd); + if (config->ctl_fd_ack >= 0) + close(config->ctl_fd_ack); + } } static struct option stat_options[] = { @@ -1153,9 +1163,10 @@ static struct option stat_options[] = { "libpfm4 event selector. use 'perf list' to list available events", parse_libpfm_events_option), #endif - OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd]", + OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" - "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.", + "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" + "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), OPT_END() }; @@ -2398,6 +2409,7 @@ out: metricgroup__rblist_exit(&stat_config.metric_events); runtime_stat_delete(&stat_config); + close_control_option(&stat_config); return status; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 47d1045a19af..00593e5f2a9d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1727,12 +1727,63 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, return leader; } -int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack) +static int evlist__parse_control_fifo(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close) +{ + char *s, *p; + int ret = 0, fd; + + if (strncmp(str, "fifo:", 5)) + return -EINVAL; + + str += 5; + if (!*str || *str == ',') + return -EINVAL; + + s = strdup(str); + if (!s) + return -ENOMEM; + + p = strchr(s, ','); + if (p) + *p = '\0'; + + /* + * O_RDWR avoids POLLHUPs which is necessary to allow the other + * end of a FIFO to be repeatedly opened and closed. + */ + fd = open(s, O_RDWR | O_NONBLOCK | O_CLOEXEC); + if (fd < 0) { + pr_err("Failed to open '%s'\n", s); + ret = -errno; + goto out_free; + } + *ctl_fd = fd; + *ctl_fd_close = true; + + if (p && *++p) { + /* O_RDWR | O_NONBLOCK means the other end need not be open */ + fd = open(p, O_RDWR | O_NONBLOCK | O_CLOEXEC); + if (fd < 0) { + pr_err("Failed to open '%s'\n", p); + ret = -errno; + goto out_free; + } + *ctl_fd_ack = fd; + } + +out_free: + free(s); + return ret; +} + +int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close) { char *comma = NULL, *endptr = NULL; + *ctl_fd_close = false; + if (strncmp(str, "fd:", 3)) - return -EINVAL; + return evlist__parse_control_fifo(str, ctl_fd, ctl_fd_ack, ctl_fd_close); *ctl_fd = strtoul(&str[3], &endptr, 0); if (endptr == &str[3]) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a5a5a07d5c55..a5678eb5ee60 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -373,7 +373,7 @@ enum evlist_ctl_cmd { EVLIST_CTL_CMD_ACK }; -int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack); +int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close); int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack); int evlist__finalize_ctlfd(struct evlist *evlist); bool evlist__ctlfd_initialized(struct evlist *evlist); diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 03678ff25539..266760ac9143 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -73,6 +73,7 @@ struct record_opts { unsigned int nr_threads_synthesize; int ctl_fd; int ctl_fd_ack; + bool ctl_fd_close; }; extern const char * const *record_usage; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index aa3bed48511b..9911fc6adbfd 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -136,6 +136,7 @@ struct perf_stat_config { struct rblist metric_events; int ctl_fd; int ctl_fd_ack; + bool ctl_fd_close; }; void perf_stat__set_big_num(int set); -- cgit v1.3.1 From d20aff1512f013fab79b8740427b7574569a9a2e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 1 Sep 2020 12:37:57 +0300 Subject: perf record: Add 'snapshot' control command Add 'snapshot' control command to create an AUX area tracing snapshot the same as if sending SIGUSR2. The advantage of the FIFO is that access is governed by access to the FIFO. Example: $ mkfifo perf.control $ mkfifo perf.ack $ cat perf.ack & [1] 15235 $ sudo ~/bin/perf record --control fifo:perf.control,perf.ack -S -e intel_pt//u -- sleep 60 & [2] 15243 $ ps -e | grep perf 15244 pts/1 00:00:00 perf $ kill -USR2 15244 bash: kill: (15244) - Operation not permitted $ echo snapshot > perf.control ack $ Signed-off-by: Adrian Hunter Acked-by: Alexey Budankov Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200901093758.32293-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 8 ++++---- tools/perf/builtin-record.c | 24 +++++++++++++++++------- tools/perf/builtin-stat.c | 1 + tools/perf/util/evlist.c | 11 +++++++++-- tools/perf/util/evlist.h | 5 ++++- 5 files changed, 35 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index c1f44633abed..768888b9326a 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -635,10 +635,10 @@ endif::HAVE_LIBPFM[] --control=fd:ctl-fd[,ack-fd]:: ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, -'disable': disable events). Measurements can be started with events disabled using ---delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor -to synchronize with the controlling process. Example of bash shell script to enable and -disable events during measurements: +'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be +started with events disabled using --delay=-1 option. Optionally send control command +completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process. +Example of bash shell script to enable and disable events during measurements: #!/bin/bash diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c83aec4940de..d0f9f8107f47 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1593,6 +1593,16 @@ static int record__init_clock(struct record *rec) return 0; } +static void hit_auxtrace_snapshot_trigger(struct record *rec) +{ + if (trigger_is_ready(&auxtrace_snapshot_trigger)) { + trigger_hit(&auxtrace_snapshot_trigger); + auxtrace_record__snapshot_started = 1; + if (auxtrace_record__snapshot_start(rec->itr)) + trigger_error(&auxtrace_snapshot_trigger); + } +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1937,6 +1947,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) case EVLIST_CTL_CMD_DISABLE: pr_info(EVLIST_DISABLED_MSG); break; + case EVLIST_CTL_CMD_SNAPSHOT: + hit_auxtrace_snapshot_trigger(rec); + evlist__ctlfd_ack(rec->evlist); + break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: default: @@ -2589,7 +2603,8 @@ static struct option __record_options[] = { parse_libpfm_events_option), #endif OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", - "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" + "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" + "\t\t\t 'snapshot': AUX area tracing snapshot).\n" "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), @@ -2842,12 +2857,7 @@ static void snapshot_sig_handler(int sig __maybe_unused) { struct record *rec = &record; - if (trigger_is_ready(&auxtrace_snapshot_trigger)) { - trigger_hit(&auxtrace_snapshot_trigger); - auxtrace_record__snapshot_started = 1; - if (auxtrace_record__snapshot_start(record.itr)) - trigger_error(&auxtrace_snapshot_trigger); - } + hit_auxtrace_snapshot_trigger(rec); if (switch_output_signal(rec)) trigger_hit(&switch_output_trigger); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 14688710195c..6562a9bdfa38 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -578,6 +578,7 @@ static void process_evlist(struct evlist *evlist, unsigned int interval) process_interval(); pr_info(EVLIST_DISABLED_MSG); break; + case EVLIST_CTL_CMD_SNAPSHOT: case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: default: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 00593e5f2a9d..e72ff7e78dec 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1887,13 +1887,17 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG, (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) { *cmd = EVLIST_CTL_CMD_DISABLE; + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_SNAPSHOT_TAG, + (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_SNAPSHOT; + pr_debug("is snapshot\n"); } } return bytes_read ? (int)bytes_read : err; } -static int evlist__ctlfd_ack(struct evlist *evlist) +int evlist__ctlfd_ack(struct evlist *evlist) { int err; @@ -1929,13 +1933,16 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) case EVLIST_CTL_CMD_DISABLE: evlist__disable(evlist); break; + case EVLIST_CTL_CMD_SNAPSHOT: + break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: default: pr_debug("ctlfd: unsupported %d\n", *cmd); break; } - if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED)) + if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED || + *cmd == EVLIST_CTL_CMD_SNAPSHOT)) evlist__ctlfd_ack(evlist); } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a5678eb5ee60..91d1da6e1fe3 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -363,6 +363,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist, #define EVLIST_CTL_CMD_ENABLE_TAG "enable" #define EVLIST_CTL_CMD_DISABLE_TAG "disable" #define EVLIST_CTL_CMD_ACK_TAG "ack\n" +#define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot" #define EVLIST_CTL_CMD_MAX_LEN 64 @@ -370,7 +371,8 @@ enum evlist_ctl_cmd { EVLIST_CTL_CMD_UNSUPPORTED = 0, EVLIST_CTL_CMD_ENABLE, EVLIST_CTL_CMD_DISABLE, - EVLIST_CTL_CMD_ACK + EVLIST_CTL_CMD_ACK, + EVLIST_CTL_CMD_SNAPSHOT, }; int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close); @@ -378,6 +380,7 @@ int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack); int evlist__finalize_ctlfd(struct evlist *evlist); bool evlist__ctlfd_initialized(struct evlist *evlist); int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd); +int evlist__ctlfd_ack(struct evlist *evlist); #define EVLIST_ENABLED_MSG "Events enabled\n" #define EVLIST_DISABLED_MSG "Events disabled\n" -- cgit v1.3.1 From 0ce0c78eff7d22c8a261de6c4305a5abb638c200 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Aug 2020 11:35:56 -0700 Subject: tools/memory-model: Expand the cheatsheet.txt notion of relaxed This commit adds a key entry enumerating the various types of relaxed operations. While in the area, it also renames the relaxed rows. [ paulmck: Apply Boqun Feng feedback. ] Acked-by: Boqun Feng Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/cheatsheet.txt | 33 ++++++++++++++----------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt index 33ba98d72b16..99d00870b160 100644 --- a/tools/memory-model/Documentation/cheatsheet.txt +++ b/tools/memory-model/Documentation/cheatsheet.txt @@ -3,9 +3,9 @@ C Self R W RMW Self R W DR DW RMW SV -- ---- - - --- ---- - - -- -- --- -- -Store, e.g., WRITE_ONCE() Y Y -Load, e.g., READ_ONCE() Y Y Y Y -Unsuccessful RMW operation Y Y Y Y +Relaxed store Y Y +Relaxed load Y Y Y Y +Relaxed RMW operation Y Y Y Y rcu_dereference() Y Y Y Y Successful *_acquire() R Y Y Y Y Y Y Successful *_release() C Y Y Y W Y @@ -17,14 +17,19 @@ smp_mb__before_atomic() CP Y Y Y a a a a Y smp_mb__after_atomic() CP a a Y Y Y Y Y Y -Key: C: Ordering is cumulative - P: Ordering propagates - R: Read, for example, READ_ONCE(), or read portion of RMW - W: Write, for example, WRITE_ONCE(), or write portion of RMW - Y: Provides ordering - a: Provides ordering given intervening RMW atomic operation - DR: Dependent read (address dependency) - DW: Dependent write (address, data, or control dependency) - RMW: Atomic read-modify-write operation - SELF: Orders self, as opposed to accesses before and/or after - SV: Orders later accesses to the same variable +Key: Relaxed: A relaxed operation is either READ_ONCE(), WRITE_ONCE(), + a *_relaxed() RMW operation, an unsuccessful RMW + operation, a non-value-returning RMW operation such + as atomic_inc(), or one of the atomic*_read() and + atomic*_set() family of operations. + C: Ordering is cumulative + P: Ordering propagates + R: Read, for example, READ_ONCE(), or read portion of RMW + W: Write, for example, WRITE_ONCE(), or write portion of RMW + Y: Provides ordering + a: Provides ordering given intervening RMW atomic operation + DR: Dependent read (address dependency) + DW: Dependent write (address, data, or control dependency) + RMW: Atomic read-modify-write operation + SELF: Orders self, as opposed to accesses before and/or after + SV: Orders later accesses to the same variable -- cgit v1.3.1 From bbe544682ee25a41f20a59fc7c29f790d502b3ce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Sep 2020 13:10:43 -0300 Subject: perf annotate: Allow configuring the 'disassembler_style' knob via 'perf config' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # perf annotate --stdio2 acpi_processor_ffh_cstate_enter > default # perf config annotate.disassembler_style=intel # perf config annotate.disassembler_style annotate.disassembler_style=intel # perf annotate --stdio2 acpi_processor_ffh_cstate_enter > intel # diff -u default intel --- default 2020-09-04 13:09:26.019205732 -0300 +++ intel 2020-09-04 13:09:52.823795081 -0300 @@ -1,42 +1,42 @@ Samples: 1K of event 'cycles', 4000 Hz, Event count (approx.): 990065316, [percent: local period] acpi_processor_ffh_cstate_enter() /lib/modules/5.9.0-rc3/build/vmlinux -Percent → callq __fentry__ - mov cpu_number,%edx - mov %edx,%edx - mov cpu_cstate_entry,%rax - add -0x7dbe9700(,%rdx,8),%rax - movzbl 0x9(%rdi),%edx - mov 0x4(%rax,%rdx,8),%edi - mov (%rax,%rdx,8),%esi - → jmpq 137ccc6 - 2d: → jmpq 137ccd8 +Percent → call __fentry__ + mov edx,DWORD PTR gs:[rip+0x7e541d74] + mov edx,edx + mov rax,QWORD PTR [rip+0x152b8fb] + add rax,QWORD PTR [rdx*8-0x7dbe9700] + movzx edx,BYTE PTR [rdi+0x9] + mov edi,DWORD PTR [rax+rdx*8+0x4] + mov esi,DWORD PTR [rax+rdx*8] + → jmp 137ccc6 + 2d: → jmp 137ccd8 mfence - mov %gs:0x17bc0,%rax - clflush (%rax) + mov rax,QWORD PTR gs:0x17bc0 + clflush BYTE PTR [rax] mfence - xor %edx,%edx - mov %rdx,%rcx - mov %gs:0x17bc0,%rax - 0.00 monitor %rax,%ecx,%edx - mov (%rax),%rax - test $0x8,%al + xor edx,edx + mov rcx,rdx + mov rax,QWORD PTR gs:0x17bc0 + 0.00 monitor + mov rax,QWORD PTR [rax] + test al,0x8 ↓ jne 71 - ↓ jmpq 68 - verw 0x538b08(%rip) # ffffffff82008150 - 68: mov %rsi,%rax - mov %rdi,%rcx -100.00 mwait %eax,%ecx - 71: mov %gs:0x17bc0,%rax - lock andb $0xdf,0x2(%rax) - lock addl $0x0,-0x4(%rsp) - mov (%rax),%rax - test $0x8,%al + ↓ jmp 68 + verw WORD PTR [rip+0x538b08] # ffffffff82008150 + 68: mov rax,rsi + mov rcx,rdi +100.00 mwait + 71: mov rax,QWORD PTR gs:0x17bc0 + lock and BYTE PTR [rax+0x2],0xdf + lock add DWORD PTR [rsp-0x4],0x0 + mov rax,QWORD PTR [rax] + test al,0x8 ↓ je 97 - andl $0x7fffffff,__preempt_count - 97: ← retq - mov %gs:0x17bc0,%rax - lock orb $0x20,0x2(%rax) - mov (%rax),%rax - test $0x8,%al + and DWORD PTR gs:[rip+0x7e548509],0x7fffffff + 97: ret + mov rax,QWORD PTR gs:0x17bc0 + lock or BYTE PTR [rax+0x2],0x20 + mov rax,QWORD PTR [rax] + test al,0x8 ↑ jne 71 - ↑ jmpq 2d + ↑ jmp 2d # Requested-by: Matt P. Dziubinski Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 5 +++++ tools/perf/util/annotate.c | 2 ++ 2 files changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 76408d986aed..31069d8a5304 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -242,6 +242,11 @@ annotate.*:: These are in control of addresses, jump function, source code in lines of assembly code from a specific program. + annotate.disassembler_style: + Use this to change the default disassembler style to some other value + supported by binutils, such as "intel", see the '-M' option help in the + 'objdump' man page. + annotate.hide_src_code:: If a program which is analyzed has source code, this option lets 'annotate' print a list of assembly code with the source code. diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0a1fcf787538..fc17af7ba845 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -3127,6 +3127,8 @@ static int annotation__config(const char *var, const char *value, void *data) value); } else if (!strcmp(var, "annotate.use_offset")) { opt->use_offset = perf_config_bool("use_offset", value); + } else if (!strcmp(var, "annotate.disassembler_style")) { + opt->disassembler_style = value; } else { pr_debug("%s variable unknown, ignoring...", var); } -- cgit v1.3.1 From 0b157b1000195bc96c3180e8a1d45e1c6c0f2fa1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Sep 2020 14:11:18 -0300 Subject: perf annotate: Add 'ret' (intel disasm style) as an alias for 'retq' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we use the 'intel' disassembler style we get 'ret' instead of 'retq', so add that as an alias. # perf annotate --disassembler-style=intel --stdio2 acpi_processor_ffh_cstate_enter > before Apply this patch and then: # perf annotate --disassembler-style=intel --stdio2 acpi_processor_ffh_cstate_enter > after # diff -u before after --- before 2020-09-04 14:10:47.768414634 -0300 +++ after 2020-09-04 14:10:59.116681039 -0300 @@ -33,7 +33,7 @@ test al,0x8 ↓ je 97 and DWORD PTR gs:[rip+0x7e548509],0x7fffffff - 97: ret + 97: ← ret mov rax,QWORD PTR gs:0x17bc0 lock or BYTE PTR [rax+0x2],0x20 mov rax,QWORD PTR [rax] # Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Ian Rogers Cc: Jin Yao Cc: Jiri Olsa Cc: Martin Liška Cc: Matt P. Dziubinski Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/annotate/instructions.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 7eb5621c021d..24ea12ec7e02 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -110,6 +110,7 @@ static struct ins x86__instructions[] = { { .name = "por", .ops = &mov_ops, }, { .name = "rclb", .ops = &mov_ops, }, { .name = "rcll", .ops = &mov_ops, }, + { .name = "ret", .ops = &ret_ops, }, { .name = "retq", .ops = &ret_ops, }, { .name = "sbb", .ops = &mov_ops, }, { .name = "sbbl", .ops = &mov_ops, }, -- cgit v1.3.1 From 9818923634206b751192e8f1554ecb93874d7d9f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 1 Sep 2020 12:37:58 +0300 Subject: perf intel-pt: Document snapshot control command The documentation describes snapshot mode. Update it to include the new snapshot control command. Signed-off-by: Adrian Hunter Acked-by: Alexey Budankov Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200901093758.32293-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index d5a266d7f15b..cb637e0d0743 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -558,7 +558,7 @@ The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g Intel PT modes of operation ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Intel PT can be used in 2 modes: +Intel PT can be used in 3 modes: full-trace mode sample mode snapshot mode @@ -571,7 +571,8 @@ Sample mode attaches a Intel PT sample to other events e.g. perf record --aux-sample -e intel_pt//u -e branch-misses:u -Snapshot mode captures the available data when a signal is sent e.g. +Snapshot mode captures the available data when a signal is sent or "snapshot" +control command is issued. e.g. using a signal perf record -v -e intel_pt//u -S ./loopy 1000000000 & [1] 11435 @@ -582,7 +583,23 @@ Note that the signal sent is SIGUSR2. Note that "Recording AUX area tracing snapshot" is displayed because the -v option is used. -The 2 modes cannot be used together. +The advantage of using "snapshot" control command is that the access is +controlled by access to a FIFO e.g. + + $ mkfifo perf.control + $ mkfifo perf.ack + $ cat perf.ack & + [1] 15235 + $ sudo ~/bin/perf record --control fifo:perf.control,perf.ack -S -e intel_pt//u -- sleep 60 & + [2] 15243 + $ ps -e | grep perf + 15244 pts/1 00:00:00 perf + $ kill -USR2 15244 + bash: kill: (15244) - Operation not permitted + $ echo snapshot > perf.control + ack + +The 3 Intel PT modes of operation cannot be used together. Buffer handling -- cgit v1.3.1 From ee7fe31e6e264d748bec5378c4d5417e14019666 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 3 Sep 2020 15:29:37 +0300 Subject: perf tools: Consolidate close_control_option()'s into one function Consolidate control option fifo closing into one function. Signed-off-by: Adrian Hunter Suggested-by: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200903122937.25691-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 12 +----------- tools/perf/builtin-stat.c | 12 +----------- tools/perf/util/evlist.c | 10 ++++++++++ tools/perf/util/evlist.h | 1 + 4 files changed, 13 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d0f9f8107f47..adf311d15d3d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2253,16 +2253,6 @@ static int parse_control_option(const struct option *opt, return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); } -static void close_control_option(struct record_opts *opts) -{ - if (opts->ctl_fd_close) { - opts->ctl_fd_close = false; - close(opts->ctl_fd); - if (opts->ctl_fd_ack >= 0) - close(opts->ctl_fd_ack); - } -} - static void switch_output_size_warn(struct record *rec) { u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); @@ -2849,7 +2839,7 @@ out: symbol__exit(); auxtrace_record__free(rec->itr); out_opts: - close_control_option(&rec->opts); + evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); return err; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6562a9bdfa38..7f8d756d9408 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1051,16 +1051,6 @@ static int parse_control_option(const struct option *opt, return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close); } -static void close_control_option(struct perf_stat_config *config) -{ - if (config->ctl_fd_close) { - config->ctl_fd_close = false; - close(config->ctl_fd); - if (config->ctl_fd_ack >= 0) - close(config->ctl_fd_ack); - } -} - static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -2410,7 +2400,7 @@ out: metricgroup__rblist_exit(&stat_config.metric_events); runtime_stat_delete(&stat_config); - close_control_option(&stat_config); + evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close); return status; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e72ff7e78dec..ee7b576d3b12 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1802,6 +1802,16 @@ int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *c return 0; } +void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close) +{ + if (*ctl_fd_close) { + *ctl_fd_close = false; + close(ctl_fd); + if (ctl_fd_ack >= 0) + close(ctl_fd_ack); + } +} + int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack) { if (fd == -1) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 91d1da6e1fe3..bc38a53f6a1a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -376,6 +376,7 @@ enum evlist_ctl_cmd { }; int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close); +void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close); int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack); int evlist__finalize_ctlfd(struct evlist *evlist); bool evlist__ctlfd_initialized(struct evlist *evlist); -- cgit v1.3.1 From 2ae05fe0a9dfe204a6c83bbe6bd1312b3bb3d301 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 4 Sep 2020 23:23:57 +0800 Subject: perf: ftrace: Add filter support for option -F/--funcs Same as 'perf probe -F', this patch adds filter support for the ftrace subcommand option '-F, --funcs <[FILTER]>'. Here is an example that only lists functions which start with 'vfs_': $ sudo perf ftrace -F vfs_* vfs_fadvise vfs_fallocate vfs_truncate vfs_open vfs_setpos vfs_llseek vfs_readf vfs_writef ... Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Changbin Du Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200904152357.6053-1-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 3 +- tools/perf/builtin-ftrace.c | 84 +++++++++++++++++++++++++++++--- 2 files changed, 80 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 78358af9a1c4..1e91121bac0f 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -33,7 +33,8 @@ OPTIONS -F:: --funcs:: - List all available functions to trace. + List available functions to trace. It accepts a pattern to + only list interested functions. -p:: --pid=:: diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 1d44bc2f63d8..9366fad591dc 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -25,6 +25,7 @@ #include "target.h" #include "cpumap.h" #include "thread_map.h" +#include "strfilter.h" #include "util/cap.h" #include "util/config.h" #include "util/units.h" @@ -36,7 +37,6 @@ struct perf_ftrace { struct evlist *evlist; struct target target; const char *tracer; - bool list_avail_functions; struct list_head filters; struct list_head notrace; struct list_head graph_funcs; @@ -181,6 +181,40 @@ out: return ret; } +static int read_tracing_file_by_line(const char *name, + void (*cb)(char *str, void *arg), + void *cb_arg) +{ + char *line = NULL; + size_t len = 0; + char *file; + FILE *fp; + + file = get_tracing_file(name); + if (!file) { + pr_debug("cannot get tracing file: %s\n", name); + return -1; + } + + fp = fopen(file, "r"); + if (fp == NULL) { + pr_debug("cannot open tracing file: %s\n", name); + put_tracing_file(file); + return -1; + } + + while (getline(&line, &len, fp) != -1) { + cb(line, cb_arg); + } + + if (line) + free(line); + + fclose(fp); + put_tracing_file(file); + return 0; +} + static int write_tracing_file_int(const char *name, int value) { char buf[16]; @@ -557,9 +591,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGPIPE, sig_handler); - if (ftrace->list_avail_functions) - return read_tracing_file_to_stdout("available_filter_functions"); - if (reset_tracing_files(ftrace) < 0) { pr_err("failed to reset ftrace\n"); goto out; @@ -683,6 +714,46 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb) return -1; } +static void list_function_cb(char *str, void *arg) +{ + struct strfilter *filter = (struct strfilter *)arg; + + if (strfilter__compare(filter, str)) + printf("%s", str); +} + +static int opt_list_avail_functions(const struct option *opt __maybe_unused, + const char *str, int unset) +{ + struct strfilter *filter; + const char *err = NULL; + int ret; + + if (unset || !str) + return -1; + + filter = strfilter__new(str, &err); + if (!filter) + return err ? -EINVAL : -ENOMEM; + + ret = strfilter__or(filter, str, &err); + if (ret == -EINVAL) { + pr_err("Filter parse error at %td.\n", err - str + 1); + pr_err("Source: \"%s\"\n", str); + pr_err(" %*c\n", (int)(err - str + 1), '^'); + strfilter__delete(filter); + return ret; + } + + ret = read_tracing_file_by_line("available_filter_functions", + list_function_cb, filter); + strfilter__delete(filter); + if (ret < 0) + return ret; + + exit(0); +} + static int parse_filter_func(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -817,8 +888,9 @@ int cmd_ftrace(int argc, const char **argv) const struct option ftrace_options[] = { OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", "Tracer to use: function_graph(default) or function"), - OPT_BOOLEAN('F', "funcs", &ftrace.list_avail_functions, - "Show available functions to filter"), + OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", + "Show available functions to filter", + opt_list_avail_functions, "*"), OPT_STRING('p', "pid", &ftrace.target.pid, "pid", "Trace on existing process id"), /* TODO: Add short option -t after -t/--tracer can be removed. */ -- cgit v1.3.1 From 60d804521ec4cd01217a96f33cd1bb29e295333d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 1 Sep 2020 17:09:41 -0500 Subject: perf vendor events amd: Add L2 Prefetch events for zen1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Later revisions of PPRs that post-date the original Family 17h events submission patch add these events. Specifically, they were not in this 2017 revision of the F17h PPR: Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors Rev 1.14 - April 15, 2017 But e.g., are included in this 2019 version of the PPR: Processor Programming Reference (PPR) for AMD Family 17h Model 18h, Revision B1 Processors Rev. 3.14 - Sep 26, 2019 Fixes: 98c07a8f74f8 ("perf vendor events amd: perf PMU events for AMD Family 17h") Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Kim Phillips Reviewed-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Borislav Petkov Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Jon Grimm Cc: Kan Liang Cc: Mark Rutland Cc: Martin Jambor Cc: Martin Liška Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Stephane Eranian Cc: Vijay Thakkar Cc: William Cohen Cc: Yunfeng Ye Link: http://lore.kernel.org/lkml/20200901220944.277505-1-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen1/cache.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json index 404d4c569c01..695ed3ffa3a6 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -249,6 +249,24 @@ "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", "UMask": "0x1" }, + { + "EventName": "l2_pf_hit_l2", + "EventCode": "0x70", + "BriefDescription": "L2 prefetch hit in L2.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_hit_l3", + "EventCode": "0x71", + "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_l3", + "EventCode": "0x72", + "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.", + "UMask": "0xff" + }, { "EventName": "l3_request_g1.caching_l3_cache_accesses", "EventCode": "0x01", -- cgit v1.3.1 From ab22eea35f1f120c4a379aa26e5333e7e41bb303 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 1 Sep 2020 17:09:42 -0500 Subject: perf vendor events amd: Add ITLB Instruction Fetch Hits event for zen1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ITLB Instruction Fetch Hits event isn't documented even in later zen1 PPRs, but it seems to count correctly on zen1 hardware. Add it to zen1 group so zen1 users can use the upcoming IC Fetch Miss Ratio Metric. The IF1G, 1IF2M, IF4K (Instruction fetches to a 1 GB, 2 MB, and 4K page) unit masks are not added because unlike zen2 hardware, zen1 hardware counts all its unit masks with a 0 unit mask according to the old convention: zen1$ perf stat -e cpu/event=0x94/,cpu/event=0x94,umask=0xff/ sleep 1 Performance counter stats for 'sleep 1': 211,318 cpu/event=0x94/u 211,318 cpu/event=0x94,umask=0xff/u Rome/zen2: zen2$ perf stat -e cpu/event=0x94/,cpu/event=0x94,umask=0xff/ sleep 1 Performance counter stats for 'sleep 1': 0 cpu/event=0x94/u 190,744 cpu/event=0x94,umask=0xff/u Signed-off-by: Kim Phillips Acked-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo # on Zen2 only (3900x) Cc: Alexander Shishkin Cc: Andi Kleen Cc: Borislav Petkov Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Jon Grimm Cc: Kan Liang Cc: Mark Rutland Cc: Martin Jambor Cc: Martin Liška Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vijay Thakkar Cc: William Cohen Cc: Yunfeng Ye Link: http://lore.kernel.org/lkml/20200901220944.277505-2-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen1/branch.json | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json index a9943eeb8d6b..4ceb67a0db21 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json @@ -19,5 +19,10 @@ "EventName": "bp_de_redirect", "EventCode": "0x91", "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)." + }, + { + "EventName": "bp_l1_tlb_fetch_hit", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB." } ] -- cgit v1.3.1 From 08ed77e414ab234258166f5628d91a7a0e681ff4 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 1 Sep 2020 17:09:43 -0500 Subject: perf vendor events amd: Add recommended events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for events listed in Section 2.1.15.2 "Performance Measurement" of "PPR for AMD Family 17h Model 31h B0 - 55803 Rev 0.54 - Sep 12, 2019". perf now supports these new events (-e): all_dc_accesses all_tlbs_flushed l1_dtlb_misses l2_cache_accesses_from_dc_misses l2_cache_accesses_from_ic_misses l2_cache_hits_from_dc_misses l2_cache_hits_from_ic_misses l2_cache_misses_from_dc_misses l2_cache_misses_from_ic_miss l2_dtlb_misses l2_itlb_misses sse_avx_stalls uops_dispatched uops_retired l3_accesses l3_misses and these metrics (-M): branch_misprediction_ratio all_l2_cache_accesses all_l2_cache_hits all_l2_cache_misses ic_fetch_miss_ratio l2_cache_accesses_from_l2_hwpf l2_cache_hits_from_l2_hwpf l2_cache_misses_from_l2_hwpf l3_read_miss_latency l1_itlb_misses all_remote_links_outbound nps1_die_to_dram The nps1_die_to_dram event may need perf stat's --metric-no-group switch if the number of available data fabric counters is less than the number it uses (8). Committer testing: On a AMD Ryzen 3900x system: Before: # perf list all_dc_accesses all_tlbs_flushed l1_dtlb_misses l2_cache_accesses_from_dc_misses l2_cache_accesses_from_ic_misses l2_cache_hits_from_dc_misses l2_cache_hits_from_ic_misses l2_cache_misses_from_dc_misses l2_cache_misses_from_ic_miss l2_dtlb_misses l2_itlb_misses sse_avx_stalls uops_dispatched uops_retired l3_accesses l3_misses | grep -v "^Metric Groups:$" | grep -v "^$" # After: # perf list all_dc_accesses all_tlbs_flushed l1_dtlb_misses l2_cache_accesses_from_dc_misses l2_cache_accesses_from_ic_misses l2_cache_hits_from_dc_misses l2_cache_hits_from_ic_misses l2_cache_misses_from_dc_misses l2_cache_misses_from_ic_miss l2_dtlb_misses l2_itlb_misses sse_avx_stalls uops_dispatched uops_retired l3_accesses l3_misses | grep -v "^Metric Groups:$" | grep -v "^$" | grep -v "^recommended:$" all_dc_accesses [All L1 Data Cache Accesses] all_tlbs_flushed [All TLBs Flushed] l1_dtlb_misses [L1 DTLB Misses] l2_cache_accesses_from_dc_misses [L2 Cache Accesses from L1 Data Cache Misses (including prefetch)] l2_cache_accesses_from_ic_misses [L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)] l2_cache_hits_from_dc_misses [L2 Cache Hits from L1 Data Cache Misses] l2_cache_hits_from_ic_misses [L2 Cache Hits from L1 Instruction Cache Misses] l2_cache_misses_from_dc_misses [L2 Cache Misses from L1 Data Cache Misses] l2_cache_misses_from_ic_miss [L2 Cache Misses from L1 Instruction Cache Misses] l2_dtlb_misses [L2 DTLB Misses & Data page walks] l2_itlb_misses [L2 ITLB Misses & Instruction page walks] sse_avx_stalls [Mixed SSE/AVX Stalls] uops_dispatched [Micro-ops Dispatched] uops_retired [Micro-ops Retired] l3_accesses [L3 Accesses. Unit: amd_l3] l3_misses [L3 Misses (includes Chg2X). Unit: amd_l3] # # perf stat -a -e all_dc_accesses,all_tlbs_flushed,l1_dtlb_misses,l2_cache_accesses_from_dc_misses,l2_cache_accesses_from_ic_misses,l2_cache_hits_from_dc_misses,l2_cache_hits_from_ic_misses,l2_cache_misses_from_dc_misses,l2_cache_misses_from_ic_miss,l2_dtlb_misses,l2_itlb_misses,sse_avx_stalls,uops_dispatched,uops_retired,l3_accesses,l3_misses sleep 2 Performance counter stats for 'system wide': 433,439,949 all_dc_accesses (35.66%) 443 all_tlbs_flushed (35.66%) 2,985,885 l1_dtlb_misses (35.66%) 18,318,019 l2_cache_accesses_from_dc_misses (35.68%) 50,114,810 l2_cache_accesses_from_ic_misses (35.72%) 12,423,978 l2_cache_hits_from_dc_misses (35.74%) 40,703,103 l2_cache_hits_from_ic_misses (35.74%) 6,698,673 l2_cache_misses_from_dc_misses (35.74%) 12,090,892 l2_cache_misses_from_ic_miss (35.74%) 614,267 l2_dtlb_misses (35.74%) 216,036 l2_itlb_misses (35.74%) 11,977 sse_avx_stalls (35.74%) 999,276,223 uops_dispatched (35.73%) 1,075,311,620 uops_retired (35.69%) 1,420,763 l3_accesses 540,164 l3_misses 2.002344121 seconds time elapsed # perf stat -a -e all_dc_accesses,all_tlbs_flushed,l1_dtlb_misses,l2_cache_accesses_from_dc_misses,l2_cache_accesses_from_ic_misses sleep 2 Performance counter stats for 'system wide': 175,943,104 all_dc_accesses 310 all_tlbs_flushed 2,280,359 l1_dtlb_misses 11,700,151 l2_cache_accesses_from_dc_misses 25,414,963 l2_cache_accesses_from_ic_misses 2.001957818 seconds time elapsed # Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Kim Phillips Acked-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Borislav Petkov Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Jon Grimm Cc: Kan Liang Cc: Mark Rutland Cc: Martin Jambor Cc: Martin Liška Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vijay Thakkar Cc: William Cohen Cc: Yunfeng Ye Link: http://lore.kernel.org/lkml/20200901220944.277505-3-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen1/cache.json | 23 +++ .../pmu-events/arch/x86/amdzen1/data-fabric.json | 98 ++++++++++++ .../pmu-events/arch/x86/amdzen1/recommended.json | 178 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen2/cache.json | 23 +++ .../pmu-events/arch/x86/amdzen2/data-fabric.json | 98 ++++++++++++ .../pmu-events/arch/x86/amdzen2/recommended.json | 178 +++++++++++++++++++++ tools/perf/pmu-events/jevents.c | 1 + 7 files changed, 599 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/recommended.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/recommended.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json index 695ed3ffa3a6..4ea7ec4f496e 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -117,6 +117,11 @@ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", "UMask": "0x1" }, + { + "EventName": "l2_request_g1.all_no_prefetch", + "EventCode": "0x60", + "UMask": "0xf9" + }, { "EventName": "l2_request_g2.group1", "EventCode": "0x61", @@ -243,6 +248,24 @@ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", "UMask": "0x1" }, + { + "EventName": "l2_cache_req_stat.ic_access_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", + "UMask": "0x7" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", + "UMask": "0x9" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).", + "UMask": "0xf6" + }, { "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json new file mode 100644 index 000000000000..40271df40015 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json @@ -0,0 +1,98 @@ +[ + { + "EventName": "remote_outbound_data_controller_0", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0", + "EventCode": "0x7c7", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_1", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1", + "EventCode": "0x807", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_2", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2", + "EventCode": "0x847", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_3", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3", + "EventCode": "0x887", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_0", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x07", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_1", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x47", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_2", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x87", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_3", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0xc7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_4", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x107", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_5", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x147", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_6", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x187", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_7", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x1c7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json new file mode 100644 index 000000000000..2cfe2d2f3bfd --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json @@ -0,0 +1,178 @@ +[ + { + "MetricName": "branch_misprediction_ratio", + "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)", + "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)", + "MetricGroup": "branch_prediction", + "ScaleUnit": "100%" + }, + { + "EventName": "all_dc_accesses", + "EventCode": "0x29", + "BriefDescription": "All L1 Data Cache Accesses", + "UMask": "0x7" + }, + { + "MetricName": "all_l2_cache_accesses", + "BriefDescription": "All L2 Cache Accesses", + "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_accesses_from_ic_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_accesses_from_dc_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)", + "UMask": "0xc8" + }, + { + "MetricName": "l2_cache_accesses_from_l2_hwpf", + "BriefDescription": "L2 Cache Accesses from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_misses", + "BriefDescription": "All L2 Cache Misses", + "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_misses_from_ic_miss", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_misses_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses", + "UMask": "0x08" + }, + { + "MetricName": "l2_cache_misses_from_l2_hwpf", + "BriefDescription": "L2 Cache Misses from L2 HWPF", + "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_hits", + "BriefDescription": "All L2 Cache Hits", + "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_hits_from_ic_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses", + "UMask": "0x06" + }, + { + "EventName": "l2_cache_hits_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses", + "UMask": "0x70" + }, + { + "MetricName": "l2_cache_hits_from_l2_hwpf", + "BriefDescription": "L2 Cache Hits from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l3_accesses", + "EventCode": "0x04", + "BriefDescription": "L3 Accesses", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_misses", + "EventCode": "0x04", + "BriefDescription": "L3 Misses (includes Chg2X)", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "MetricName": "l3_read_miss_latency", + "BriefDescription": "Average L3 Read Miss Latency (in core clocks)", + "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1.all_l3_miss_req_typs", + "MetricGroup": "l3_cache", + "ScaleUnit": "1core clocks" + }, + { + "MetricName": "ic_fetch_miss_ratio", + "BriefDescription": "L1 Instruction Cache (32B) Fetch Miss Ratio", + "MetricExpr": "d_ratio(l2_cache_req_stat.ic_access_in_l2, bp_l1_tlb_fetch_hit + bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_miss)", + "MetricGroup": "l2_cache", + "ScaleUnit": "100%" + }, + { + "MetricName": "l1_itlb_misses", + "BriefDescription": "L1 ITLB Misses", + "MetricExpr": "bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_miss", + "MetricGroup": "tlb" + }, + { + "EventName": "l2_itlb_misses", + "EventCode": "0x85", + "BriefDescription": "L2 ITLB Misses & Instruction page walks", + "UMask": "0x07" + }, + { + "EventName": "l1_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Misses", + "UMask": "0xff" + }, + { + "EventName": "l2_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L2 DTLB Misses & Data page walks", + "UMask": "0xf0" + }, + { + "EventName": "all_tlbs_flushed", + "EventCode": "0x78", + "BriefDescription": "All TLBs Flushed", + "UMask": "0xdf" + }, + { + "EventName": "uops_dispatched", + "EventCode": "0xaa", + "BriefDescription": "Micro-ops Dispatched", + "UMask": "0x03" + }, + { + "EventName": "sse_avx_stalls", + "EventCode": "0x0e", + "BriefDescription": "Mixed SSE/AVX Stalls", + "UMask": "0x0e" + }, + { + "EventName": "uops_retired", + "EventCode": "0xc1", + "BriefDescription": "Micro-ops Retired" + }, + { + "MetricName": "all_remote_links_outbound", + "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)", + "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3e-5MiB" + }, + { + "MetricName": "nps1_die_to_dram", + "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)", + "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.1e-5MiB" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json index 1c60bfa0f00b..f61b982f83ca 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json @@ -47,6 +47,11 @@ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", "UMask": "0x1" }, + { + "EventName": "l2_request_g1.all_no_prefetch", + "EventCode": "0x60", + "UMask": "0xf9" + }, { "EventName": "l2_request_g2.group1", "EventCode": "0x61", @@ -173,6 +178,24 @@ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", "UMask": "0x1" }, + { + "EventName": "l2_cache_req_stat.ic_access_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", + "UMask": "0x7" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", + "UMask": "0x9" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).", + "UMask": "0xf6" + }, { "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json new file mode 100644 index 000000000000..40271df40015 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json @@ -0,0 +1,98 @@ +[ + { + "EventName": "remote_outbound_data_controller_0", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0", + "EventCode": "0x7c7", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_1", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1", + "EventCode": "0x807", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_2", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2", + "EventCode": "0x847", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_3", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3", + "EventCode": "0x887", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_0", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x07", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_1", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x47", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_2", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x87", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_3", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0xc7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_4", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x107", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_5", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x147", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_6", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x187", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_7", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x1c7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json new file mode 100644 index 000000000000..2ef91e25e661 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json @@ -0,0 +1,178 @@ +[ + { + "MetricName": "branch_misprediction_ratio", + "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)", + "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)", + "MetricGroup": "branch_prediction", + "ScaleUnit": "100%" + }, + { + "EventName": "all_dc_accesses", + "EventCode": "0x29", + "BriefDescription": "All L1 Data Cache Accesses", + "UMask": "0x7" + }, + { + "MetricName": "all_l2_cache_accesses", + "BriefDescription": "All L2 Cache Accesses", + "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_accesses_from_ic_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_accesses_from_dc_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)", + "UMask": "0xc8" + }, + { + "MetricName": "l2_cache_accesses_from_l2_hwpf", + "BriefDescription": "L2 Cache Accesses from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_misses", + "BriefDescription": "All L2 Cache Misses", + "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_misses_from_ic_miss", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_misses_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses", + "UMask": "0x08" + }, + { + "MetricName": "l2_cache_misses_from_l2_hwpf", + "BriefDescription": "L2 Cache Misses from L2 HWPF", + "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_hits", + "BriefDescription": "All L2 Cache Hits", + "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_hits_from_ic_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses", + "UMask": "0x06" + }, + { + "EventName": "l2_cache_hits_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses", + "UMask": "0x70" + }, + { + "MetricName": "l2_cache_hits_from_l2_hwpf", + "BriefDescription": "L2 Cache Hits from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l3_accesses", + "EventCode": "0x04", + "BriefDescription": "L3 Accesses", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_misses", + "EventCode": "0x04", + "BriefDescription": "L3 Misses (includes Chg2X)", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "MetricName": "l3_read_miss_latency", + "BriefDescription": "Average L3 Read Miss Latency (in core clocks)", + "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1.all_l3_miss_req_typs", + "MetricGroup": "l3_cache", + "ScaleUnit": "1core clocks" + }, + { + "MetricName": "ic_fetch_miss_ratio", + "BriefDescription": "L1 Instruction Cache (32B) Fetch Miss Ratio", + "MetricExpr": "d_ratio(l2_cache_req_stat.ic_access_in_l2, bp_l1_tlb_fetch_hit + bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_tlb_miss)", + "MetricGroup": "l2_cache", + "ScaleUnit": "100%" + }, + { + "MetricName": "l1_itlb_misses", + "BriefDescription": "L1 ITLB Misses", + "MetricExpr": "bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_tlb_miss", + "MetricGroup": "tlb" + }, + { + "EventName": "l2_itlb_misses", + "EventCode": "0x85", + "BriefDescription": "L2 ITLB Misses & Instruction page walks", + "UMask": "0x07" + }, + { + "EventName": "l1_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Misses", + "UMask": "0xff" + }, + { + "EventName": "l2_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L2 DTLB Misses & Data page walks", + "UMask": "0xf0" + }, + { + "EventName": "all_tlbs_flushed", + "EventCode": "0x78", + "BriefDescription": "All TLBs Flushed", + "UMask": "0xdf" + }, + { + "EventName": "uops_dispatched", + "EventCode": "0xaa", + "BriefDescription": "Micro-ops Dispatched", + "UMask": "0x03" + }, + { + "EventName": "sse_avx_stalls", + "EventCode": "0x0e", + "BriefDescription": "Mixed SSE/AVX Stalls", + "UMask": "0x0e" + }, + { + "EventName": "uops_retired", + "EventCode": "0xc1", + "BriefDescription": "Micro-ops Retired" + }, + { + "MetricName": "all_remote_links_outbound", + "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)", + "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3e-5MiB" + }, + { + "MetricName": "nps1_die_to_dram", + "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)", + "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.1e-5MiB" + } +] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index fc9c158bfa13..e37d17830c5c 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -240,6 +240,7 @@ static struct map { { "hisi_sccl,hha", "hisi_sccl,hha" }, { "hisi_sccl,l3c", "hisi_sccl,l3c" }, { "L3PMC", "amd_l3" }, + { "DFPMC", "amd_df" }, {} }; -- cgit v1.3.1 From 09b54b30ccdcd3e17cc13079f581b1a389b04939 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 1 Sep 2020 17:09:44 -0500 Subject: perf vendor events amd: Enable Family 19h users by matching Zen2 events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables zen3 users by reusing mostly-compatible zen2 events until the official public list of zen3 events is published in a future PPR. Signed-off-by: Kim Phillips Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Borislav Petkov Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Jon Grimm Cc: Kan Liang Cc: Mark Rutland Cc: Martin Jambor Cc: Martin Liška Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vijay Thakkar Cc: William Cohen Cc: Yunfeng Ye Link: http://lore.kernel.org/lkml/20200901220944.277505-4-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 25b06cf98747..2f2a209e87e1 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -38,3 +38,4 @@ GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core +AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen2,core -- cgit v1.3.1 From 4751bddd3f983af2004ec470ca38b42d7a8a53bc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Sep 2020 17:11:59 -0300 Subject: perf tools: Make GTK2 support opt-in This is bitrotting, nobody is stepping up to work on it, and since we treat warnings as errors, feature detection is failing in its main, faster test (tools/build/feature/test-all.c) because of the GTK+2 infobar check. So make this opt-in, at some point ditch this if nobody volunteers to take care of this. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 5 ++--- tools/build/feature/Makefile | 2 +- tools/build/feature/test-all.c | 10 ---------- tools/perf/Makefile.config | 4 +++- tools/perf/Makefile.perf | 6 +++--- tools/perf/builtin-version.c | 1 - 6 files changed, 9 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 8b381d8ec9de..d37e11732f44 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -38,8 +38,6 @@ FEATURE_TESTS_BASIC := \ get_current_dir_name \ gettid \ glibc \ - gtk2 \ - gtk2-infobar \ libbfd \ libbfd-buildid \ libcap \ @@ -82,6 +80,8 @@ FEATURE_TESTS_EXTRA := \ compile-32 \ compile-x32 \ cplus-demangle \ + gtk2 \ + gtk2-infobar \ hello \ libbabeltrace \ libbfd-liberty \ @@ -112,7 +112,6 @@ FEATURE_DISPLAY ?= \ dwarf \ dwarf_getlocations \ glibc \ - gtk2 \ libbfd \ libbfd-buildid \ libcap \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 9e5f8db4a168..977067e34dff 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -91,7 +91,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma $(OUTPUT)test-hello.bin: $(BUILD) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 80c5795f324b..2c955628fc1a 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -78,14 +78,6 @@ # include "test-libslang.c" #undef main -#define main main_test_gtk2 -# include "test-gtk2.c" -#undef main - -#define main main_test_gtk2_infobar -# include "test-gtk2-infobar.c" -#undef main - #define main main_test_libbfd # include "test-libbfd.c" #undef main @@ -209,8 +201,6 @@ int main(int argc, char *argv[]) main_test_libelf_getshdrstrndx(); main_test_libunwind(); main_test_libslang(); - main_test_gtk2(argc, argv); - main_test_gtk2_infobar(argc, argv); main_test_libbfd(); main_test_libbfd_buildid(); main_test_backtrace(); diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f73a85ea3e7f..854da830b5ca 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -724,12 +724,14 @@ ifndef NO_SLANG endif endif -ifndef NO_GTK2 +ifdef GTK2 FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) + $(call feature_check,gtk2) ifneq ($(feature-gtk2), 1) msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); NO_GTK2 := 1 else + $(call feature_check,gtk2-infobar) ifeq ($(feature-gtk2-infobar), 1) GTK_CFLAGS := -DHAVE_GTK_INFO_BAR_SUPPORT endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index b2a3f5b652fe..920d8afb9238 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -48,7 +48,7 @@ include ../scripts/utilities.mak # # Define NO_SLANG if you do not want TUI support. # -# Define NO_GTK2 if you do not want GTK+ GUI support. +# Define GTK2 if you want GTK+ GUI support. # # Define NO_DEMANGLE if you do not want C++ symbol demangling. # @@ -386,7 +386,7 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifndef NO_GTK2 +ifdef GTK2 ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so GTK_IN := $(OUTPUT)gtk-in.o endif @@ -886,7 +886,7 @@ check: $(OUTPUT)common-cmds.h ### Installation rules -ifndef NO_GTK2 +ifdef GTK2 install-gtk: $(OUTPUT)libperf-gtk.so $(call QUIET_INSTALL, 'GTK UI') \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \ diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 05cf2af9e2c2..d09ec2f03071 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -60,7 +60,6 @@ static void library_status(void) STATUS(HAVE_DWARF_SUPPORT, dwarf); STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations); STATUS(HAVE_GLIBC_SUPPORT, glibc); - STATUS(HAVE_GTK2_SUPPORT, gtk2); #ifndef HAVE_SYSCALL_TABLE_SUPPORT STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit); #endif -- cgit v1.3.1 From 1a7581b174e99e18d459856bb26ab01258c027c3 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 4 Sep 2020 17:14:52 +0100 Subject: tools: bpftool: Fix formatting in bpftool-link documentation Fix a formatting error in the documentation for bpftool-link, so that the man page can build correctly. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200904161454.31135-2-quentin@isovalent.com --- tools/bpf/bpftool/Documentation/bpftool-link.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 4a52e7a93339..dc7693b5e606 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -21,7 +21,7 @@ LINK COMMANDS | **bpftool** **link { show | list }** [*LINK*] | **bpftool** **link pin** *LINK* *FILE* -| **bpftool** **link detach *LINK* +| **bpftool** **link detach** *LINK* | **bpftool** **link help** | | *LINK* := { **id** *LINK_ID* | **pinned** *FILE* } -- cgit v1.3.1 From bc0b5a03079bd78fb3d5cba1ccabf0a7efb1d99f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 4 Sep 2020 17:14:54 +0100 Subject: tools, bpf: Synchronise BPF UAPI header with tools Synchronise the bpf.h header under tools, to report the fixes recently brought to the documentation for the BPF helpers. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200904161454.31135-4-quentin@isovalent.com --- tools/include/uapi/linux/bpf.h | 87 ++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 42 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8dda13880957..90359cab501d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3349,38 +3349,38 @@ union bpf_attr { * Description * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *task*, which is a valid - * pointer to struct task_struct. To store the stacktrace, the - * bpf program provides *buf* with a nonnegative *size*. + * pointer to **struct task_struct**. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. * * The last argument, *flags*, holds the number of stack frames to * skip (from 0 to 255), masked with @@ -3410,12 +3410,12 @@ union bpf_attr { * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description * Load header option. Support reading a particular TCP header - * option for bpf program (BPF_PROG_TYPE_SOCK_OPS). + * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**). * * If *flags* is 0, it will search the option from the - * sock_ops->skb_data. The comment in "struct bpf_sock_ops" + * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops** * has details on what skb_data contains under different - * sock_ops->op. + * *skops*\ **->op**. * * The first byte of the *searchby_res* specifies the * kind that it wants to search. @@ -3435,7 +3435,7 @@ union bpf_attr { * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. * * To search for the standard window scale option (3), - * the searchby_res should be [ 3, 0, 0, .... 0 ]. + * the *searchby_res* should be [ 3, 0, 0, .... 0 ]. * Note, kind-length must be 0 for regular option. * * Searching for No-Op (0) and End-of-Option-List (1) are @@ -3445,27 +3445,30 @@ union bpf_attr { * of a header option. * * Supported flags: + * * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the * saved_syn packet or the just-received syn packet. * * Return - * >0 when found, the header option is copied to *searchby_res*. - * The return value is the total length copied. + * > 0 when found, the header option is copied to *searchby_res*. + * The return value is the total length copied. On failure, a + * negative error code is returned: * - * **-EINVAL** If param is invalid + * **-EINVAL** if a parameter is invalid. * - * **-ENOMSG** The option is not found + * **-ENOMSG** if the option is not found. * - * **-ENOENT** No syn packet available when - * **BPF_LOAD_HDR_OPT_TCP_SYN** is used + * **-ENOENT** if no syn packet is available when + * **BPF_LOAD_HDR_OPT_TCP_SYN** is used. * - * **-ENOSPC** Not enough space. Only *len* number of - * bytes are copied. + * **-ENOSPC** if there is not enough space. Only *len* number of + * bytes are copied. * - * **-EFAULT** Cannot parse the header options in the packet + * **-EFAULT** on failure to parse the header options in the + * packet. * - * **-EPERM** This helper cannot be used under the - * current sock_ops->op. + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. * * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags) * Description @@ -3483,44 +3486,44 @@ union bpf_attr { * by searching the same option in the outgoing skb. * * This helper can only be called during - * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. * * Return * 0 on success, or negative error in case of failure: * - * **-EINVAL** If param is invalid + * **-EINVAL** If param is invalid. * - * **-ENOSPC** Not enough space in the header. - * Nothing has been written + * **-ENOSPC** if there is not enough space in the header. + * Nothing has been written * - * **-EEXIST** The option has already existed + * **-EEXIST** if the option already exists. * - * **-EFAULT** Cannot parse the existing header options + * **-EFAULT** on failrue to parse the existing header options. * - * **-EPERM** This helper cannot be used under the - * current sock_ops->op. + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. * * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags) * Description * Reserve *len* bytes for the bpf header option. The - * space will be used by bpf_store_hdr_opt() later in - * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * space will be used by **bpf_store_hdr_opt**\ () later in + * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. * - * If bpf_reserve_hdr_opt() is called multiple times, + * If **bpf_reserve_hdr_opt**\ () is called multiple times, * the total number of bytes will be reserved. * * This helper can only be called during - * BPF_SOCK_OPS_HDR_OPT_LEN_CB. + * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**. * * Return * 0 on success, or negative error in case of failure: * - * **-EINVAL** if param is invalid + * **-EINVAL** if a parameter is invalid. * - * **-ENOSPC** Not enough space in the header. + * **-ENOSPC** if there is not enough space in the header. * - * **-EPERM** This helper cannot be used under the - * current sock_ops->op. + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. * * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) * Description @@ -3560,9 +3563,9 @@ union bpf_attr { * * long bpf_d_path(struct path *path, char *buf, u32 sz) * Description - * Return full path for given 'struct path' object, which - * needs to be the kernel BTF 'path' object. The path is - * returned in the provided buffer 'buf' of size 'sz' and + * Return full path for given **struct path** object, which + * needs to be the kernel BTF *path* object. The path is + * returned in the provided buffer *buf* of size *sz* and * is zero terminated. * * Return @@ -3573,7 +3576,7 @@ union bpf_attr { * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr) * Description * Read *size* bytes from user space address *user_ptr* and store - * the data in *dst*. This is a wrapper of copy_from_user(). + * the data in *dst*. This is a wrapper of **copy_from_user**\ (). * Return * 0 on success, or a negative error in case of failure. */ -- cgit v1.3.1 From 05a2ae7c033ee30f25fbed3ceed549a5cac398a9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:08 +0200 Subject: x86/insn: Make inat-tables.c suitable for pre-decompression code The inat-tables.c file has some arrays in it that contain pointers to other arrays. These pointers need to be relocated when the kernel image is moved to a different location. The pre-decompression boot-code has no support for applying ELF relocations, so initialize these arrays at runtime in the pre-decompression code to make sure all pointers are correctly initialized. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20200907131613.12703-8-joro@8bytes.org --- arch/x86/tools/gen-insn-attr-x86.awk | 50 +++++++++++++++++++++++++++++- tools/arch/x86/tools/gen-insn-attr-x86.awk | 50 +++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index a42015b305f4..af38469afd14 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -362,6 +362,9 @@ function convert_operands(count,opnd, i,j,imm,mod) END { if (awkchecked != "") exit 1 + + print "#ifndef __BOOT_COMPRESSED\n" + # print escape opcode map's array print "/* Escape opcode map array */" print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ @@ -388,6 +391,51 @@ END { for (j = 0; j < max_lprefix; j++) if (atable[i,j]) print " ["i"]["j"] = "atable[i,j]"," - print "};" + print "};\n" + + print "#else /* !__BOOT_COMPRESSED */\n" + + print "/* Escape opcode map array */" + print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "/* Group opcode map array */" + print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "/* AVX opcode map array */" + print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "static void inat_init_tables(void)" + print "{" + + # print escape opcode map's array + print "\t/* Print Escape opcode map array */" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";" + print "" + + # print group opcode map's array + print "\t/* Print Group opcode map array */" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";" + print "" + # print AVX opcode map's array + print "\t/* Print AVX opcode map array */" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";" + + print "}" + print "#endif" } diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk index a42015b305f4..af38469afd14 100644 --- a/tools/arch/x86/tools/gen-insn-attr-x86.awk +++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk @@ -362,6 +362,9 @@ function convert_operands(count,opnd, i,j,imm,mod) END { if (awkchecked != "") exit 1 + + print "#ifndef __BOOT_COMPRESSED\n" + # print escape opcode map's array print "/* Escape opcode map array */" print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ @@ -388,6 +391,51 @@ END { for (j = 0; j < max_lprefix; j++) if (atable[i,j]) print " ["i"]["j"] = "atable[i,j]"," - print "};" + print "};\n" + + print "#else /* !__BOOT_COMPRESSED */\n" + + print "/* Escape opcode map array */" + print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "/* Group opcode map array */" + print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "/* AVX opcode map array */" + print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1];" + print "" + + print "static void inat_init_tables(void)" + print "{" + + # print escape opcode map's array + print "\t/* Print Escape opcode map array */" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";" + print "" + + # print group opcode map's array + print "\t/* Print Group opcode map array */" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";" + print "" + # print AVX opcode map's array + print "\t/* Print AVX opcode map array */" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";" + + print "}" + print "#endif" } -- cgit v1.3.1 From 0c5edd77a21dc863aa7639ea07c9d7a5b15f62f9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 7 Sep 2020 18:14:28 +0200 Subject: selftests/net: replace obsolete NFT_CHAIN configuration Replace old parameters with global NFT_NAT from commit db8ab38880e0 ("netfilter: nf_tables: merge ipv4 and ipv6 nat chain types") Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3b42c06b5985..5a57ea02802d 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -24,8 +24,7 @@ CONFIG_IP_NF_NAT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NFT_NAT=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y -- cgit v1.3.1 From c0176429b7b07893a5c1fd38baff055c919ba9e3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Aug 2020 11:34:43 +1000 Subject: selftests/powerpc: Fix TM tests when CPU 0 is offline Several of the TM tests fail spuriously if CPU 0 is offline, because they blindly try to affinitise to CPU 0. Fix them by picking any online CPU and using that instead. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200813013445.686464-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/tm/tm-poison.c | 11 +++++++---- tools/testing/selftests/powerpc/tm/tm-trap.c | 10 ++++++---- tools/testing/selftests/powerpc/tm/tm-unavailable.c | 9 ++++++--- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c index 977558497c16..29e5f26af7b9 100644 --- a/tools/testing/selftests/powerpc/tm/tm-poison.c +++ b/tools/testing/selftests/powerpc/tm/tm-poison.c @@ -26,7 +26,7 @@ int tm_poison_test(void) { - int pid; + int cpu, pid; cpu_set_t cpuset; uint64_t poison = 0xdeadbeefc0dec0fe; uint64_t unknown = 0; @@ -35,10 +35,13 @@ int tm_poison_test(void) SKIP_IF(!have_htm()); - /* Attach both Child and Parent to CPU 0 */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Attach both Child and Parent to the same CPU CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); + CPU_SET(cpu, &cpuset); + FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0); pid = fork(); if (!pid) { diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index 601f0c1d450d..c75960af8018 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -247,8 +247,7 @@ void *pong(void *not_used) int tm_trap_test(void) { uint16_t k = 1; - - int rc; + int cpu, rc; pthread_attr_t attr; cpu_set_t cpuset; @@ -267,9 +266,12 @@ int tm_trap_test(void) usr1_sa.sa_sigaction = usr1_signal_handler; sigaction(SIGUSR1, &usr1_sa, NULL); - /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Set only one CPU in the mask. Both threads will be bound to that CPU. CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); + CPU_SET(cpu, &cpuset); /* Init pthread attribute */ rc = pthread_attr_init(&attr); diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c index 2ca2fccb0a3e..a1348a5f721a 100644 --- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c +++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c @@ -338,16 +338,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) int tm_unavailable_test(void) { - int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ + int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ pthread_t t1; pthread_attr_t attr; cpu_set_t cpuset; SKIP_IF(!have_htm()); - /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Set only one CPU in the mask. Both threads will be bound to that CPU. CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); + CPU_SET(cpu, &cpuset); /* Init pthread attribute. */ rc = pthread_attr_init(&attr); -- cgit v1.3.1 From 769628710c33b18ede837bb488e1d24084b35592 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Aug 2020 11:34:44 +1000 Subject: selftests/powerpc: Don't use setaffinity in tm-tmspr This test tries to set affinity to CPUs that don't exist, especially if the set of online CPUs doesn't start at 0. But there's no real reason for it to use setaffinity in the first place, it's just trying to create lots of threads to cause contention. So drop the setaffinity entirely. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200813013445.686464-2-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/tm/tm-tmspr.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c index 17becf3dcee4..2ff329e2fca9 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -38,14 +38,8 @@ int passed = 1; void tfiar_tfhar(void *in) { - int i, cpu; unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd; - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - cpu = (unsigned long)in >> 1; - CPU_SET(cpu, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); + int i; /* TFIAR: Last bit has to be high so userspace can read register */ tfiar = ((unsigned long)in) + 1; -- cgit v1.3.1 From b5a646a681f5d67ea5190a71d6e84a91efe63b7a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Aug 2020 11:34:45 +1000 Subject: selftests/powerpc: Run tm-tmspr test for longer This test creates some threads, which write to TM SPRs, and then makes sure the registers maintain the correct values across context switches and contention with other threads. But currently the test finishes almost instantaneously, which reduces the chance of it hitting an interesting condition. So increase the number of loops, so it runs a bit longer, though still less than 2s on a Power8. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200813013445.686464-3-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/tm/tm-tmspr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c index 2ff329e2fca9..794d574db784 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -33,7 +33,7 @@ #include "utils.h" #include "tm.h" -int num_loops = 10000; +int num_loops = 1000000; int passed = 1; void tfiar_tfhar(void *in) -- cgit v1.3.1 From 34c103342be3f9397e656da7c5cc86e97b91f514 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:19 +1000 Subject: selftests/powerpc: Make using_hash_mmu() work on Cell & PowerMac These platforms don't show the MMU in /proc/cpuinfo, but they always use hash, so teach using_hash_mmu() that. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/utils.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 18b6a773d5c7..638ffacc90aa 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -318,7 +318,9 @@ int using_hash_mmu(bool *using_hash) rc = 0; while (fgets(line, sizeof(line), f) != NULL) { - if (strcmp(line, "MMU : Hash\n") == 0) { + if (!strcmp(line, "MMU : Hash\n") || + !strcmp(line, "platform : Cell\n") || + !strcmp(line, "platform : PowerMac\n")) { *using_hash = true; goto out; } -- cgit v1.3.1 From 17c98a541dc9bb1162877af41cddbdca043f9a59 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:20 +1000 Subject: selftests/powerpc: Give the bad_accesses test longer to run On older systems this test takes longer to run (duh), give it five minutes which is long enough on a G5 970FX @ 1.6GHz. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-2-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/mm/bad_accesses.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index a864ed7e2008..fd747b2ffcfc 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -139,5 +139,6 @@ static int test(void) int main(void) { + test_harness_set_timeout(300); return test_harness(test, "bad_accesses"); } -- cgit v1.3.1 From d89002397cfb2b65267d6688fe671ee1cf7c5f0d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:21 +1000 Subject: selftests/powerpc: Move set_dscr() into rfi_flush.c This version of set_dscr() was added for the RFI flush test, and is fairly specific to it. It also clashes with the version of set_dscr() in dscr/dscr.h. So move it into the RFI flush test where it's used. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-3-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/include/utils.h | 1 - .../testing/selftests/powerpc/security/rfi_flush.c | 35 ++++++++++++++++++++++ tools/testing/selftests/powerpc/utils.c | 35 ---------------------- 3 files changed, 35 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 71d2924f5b8b..bba400d1bb90 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -35,7 +35,6 @@ int pick_online_cpu(void); int read_debugfs_file(char *debugfs_file, int *result); int write_debugfs_file(char *debugfs_file, int result); int read_sysfs_file(char *debugfs_file, char *result, size_t result_size); -void set_dscr(unsigned long val); int perf_event_open_counter(unsigned int type, unsigned long config, int group_fd); int perf_event_enable(int fd); diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 0a7d0afb26b8..fd37ff9b1c45 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,40 @@ static void syscall_loop(char *p, unsigned long iterations, } } +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned = 0; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +static void set_dscr(unsigned long val) +{ + static int init = 0; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} + int rfi_flush_test(void) { char *p; diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 638ffacc90aa..1f36ee1a909a 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -273,40 +272,6 @@ int perf_event_reset(int fd) return 0; } -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned = 0; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -void set_dscr(unsigned long val) -{ - static int init = 0; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} - int using_hash_mmu(bool *using_hash) { char line[128]; -- cgit v1.3.1 From 178282a054dced1a08a9683d41ac08cbace2b2fe Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:22 +1000 Subject: selftests/powerpc: Include asm/cputable.h from utils.h utils.h provides have_hwcap() and have_hwcap2() which check for a feature bit. Those bits are defined in asm/cputable.h, so include it in utils.h so users of utils.h don't have to do it manually. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-4-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/alignment/alignment_handler.c | 2 -- tools/testing/selftests/powerpc/include/utils.h | 1 + tools/testing/selftests/powerpc/pmu/count_stcx_fail.c | 1 - tools/testing/selftests/powerpc/pmu/per_event_excludes.c | 2 -- tools/testing/selftests/powerpc/stringloops/memcmp.c | 2 +- tools/testing/selftests/powerpc/tm/tm.h | 3 +-- 6 files changed, 3 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 55ef15184057..e4063eba4a5b 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -55,8 +55,6 @@ #include #include -#include - #include "utils.h" #include "instructions.h" diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index bba400d1bb90..052b5a775dc2 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "reg.h" /* Avoid headaches with PRI?64 - just use %ll? always */ diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 2980abca31e0..2070a1e2b3a5 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -9,7 +9,6 @@ #include #include #include -#include #include "event.h" #include "utils.h" diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c index 2d37942bf72b..ad32a09a6540 100644 --- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c +++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c @@ -12,8 +12,6 @@ #include #include -#include - #include "event.h" #include "lib.h" #include "utils.h" diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c index 979df3d98368..cb2f18855c8d 100644 --- a/tools/testing/selftests/powerpc/stringloops/memcmp.c +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -4,7 +4,7 @@ #include #include #include -#include + #include "utils.h" #define SIZE 256 diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h index c402464b038f..c5a1e5c163fc 100644 --- a/tools/testing/selftests/powerpc/tm/tm.h +++ b/tools/testing/selftests/powerpc/tm/tm.h @@ -6,9 +6,8 @@ #ifndef _SELFTESTS_POWERPC_TM_TM_H #define _SELFTESTS_POWERPC_TM_TM_H -#include -#include #include +#include #include "utils.h" -- cgit v1.3.1 From 4c3c3c502575556c4bc1b401235e641863b1bce6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:23 +1000 Subject: selftests/powerpc: Don't run DSCR tests on old systems The DSCR tests fail on systems that don't have DSCR, so check for the DSCR in hwcap and skip if it's not present. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-5-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/dscr/Makefile | 2 +- tools/testing/selftests/powerpc/dscr/dscr_default_test.c | 2 ++ tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c | 2 ++ tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c | 2 ++ tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c | 2 ++ tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c | 2 ++ tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c | 2 ++ tools/testing/selftests/powerpc/dscr/dscr_user_test.c | 2 ++ 8 files changed, 15 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index cfa6eedcb66c..845db6273a1b 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -10,4 +10,4 @@ include ../../lib.mk $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread -$(TEST_GEN_PROGS): ../harness.c +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c index 288a4e2ad156..e76611e608af 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c @@ -63,6 +63,8 @@ int dscr_default(void) unsigned long i, *status[THREADS]; unsigned long orig_dscr_default; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); /* Initial DSCR default */ diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c index aefcd8d8759b..32fcf2b324b1 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c @@ -21,6 +21,8 @@ int dscr_explicit(void) { unsigned long i, dscr = 0; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + srand(getpid()); set_dscr(dscr); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c index 7c1cb46397c6..c6a81b2d6b91 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -44,6 +44,8 @@ int dscr_inherit_exec(void) unsigned long i, dscr = 0; pid_t pid; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + for (i = 0; i < COUNT; i++) { dscr++; if (dscr > DSCR_MAX) diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c index 04297a69ab59..f9dfd3d3c2d5 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c @@ -22,6 +22,8 @@ int dscr_inherit(void) unsigned long i, dscr = 0; pid_t pid; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + srand(getpid()); set_dscr(dscr); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c index 02f6b4efde14..fbbdffdb2e5d 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -77,6 +77,8 @@ int dscr_sysfs(void) unsigned long orig_dscr_default; int i, j; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); for (i = 0; i < COUNT; i++) { for (j = 0; j < DSCR_MAX; j++) { diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c index 37be2c25f277..191ed126f118 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c @@ -56,6 +56,8 @@ int dscr_sysfs_thread(void) unsigned long orig_dscr_default; int i, j; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); for (i = 0; i < COUNT; i++) { for (j = 0; j < DSCR_MAX; j++) { diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c index eaf785d11eed..e09072446dd3 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c @@ -36,6 +36,8 @@ int dscr_user(void) { int i; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + check_dscr(""); for (i = 0; i < COUNT; i++) { -- cgit v1.3.1 From 3a31518a242dcb262b008d3bb5d4b1cf50cf4026 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:24 +1000 Subject: selftests/powerpc: Skip security tests on older CPUs Both these tests use PMU events that only work on newer CPUs, so skip them on older CPUs. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-6-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/security/rfi_flush.c | 3 +++ tools/testing/selftests/powerpc/security/spectre_v2.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index fd37ff9b1c45..93a65bd1f231 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -89,6 +89,9 @@ int rfi_flush_test(void) SKIP_IF(geteuid() != 0); + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index c8d82b784102..adc2b7294e5f 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -134,6 +134,9 @@ int spectre_v2_test(void) s64 miss_percent; bool is_p9; + // The PMU events we use only work on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + state = get_sysfs_state(); if (state == UNKNOWN) { printf("Error: couldn't determine spectre_v2 mitigation state?\n"); -- cgit v1.3.1 From 4871a10b7b5f6b0632bff229884dad1cb1e8dc37 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:25 +1000 Subject: selftests/powerpc: Skip L3 bank test on older CPUs This is a test of specific piece of logic in isa207-common.c, which is only used on Power8 or later. So skip it on older CPUs. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-7-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/pmu/l3_bank_test.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c index a96d512a18c4..a5dfa9bf3b9f 100644 --- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c +++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c @@ -20,6 +20,9 @@ static int l3_bank_test(void) char *p; int i; + // The L3 bank logic is only used on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + p = malloc(MALLOC_SIZE); FAIL_IF(!p); -- cgit v1.3.1 From 09275d717d1b2d7d5ed91f2140bb34246514a1b4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:26 +1000 Subject: selftests/powerpc: Don't touch VMX/VSX on older CPUs If we're running on a CPU without VMX/VSX then don't touch them. This is fragile, the compiler could spill a VMX/VSX register and break the test anyway. But in practice it seems to work, ie. the test runs to completion on a system without VSX with this change. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-8-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/benchmarks/context_switch.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c index d50cc05df495..96554e2794d1 100644 --- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -481,6 +481,12 @@ int main(int argc, char *argv[]) else printf("futex"); + if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)) + touch_altivec = 0; + + if (!have_hwcap(PPC_FEATURE_HAS_VSX)) + touch_vector = 0; + printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n", cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no", touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no"); -- cgit v1.3.1 From 003d6f5fd2cc3b529f3e6c529bc4bb0792930212 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Aug 2020 11:57:27 +1000 Subject: selftests/powerpc: Properly handle failure in switch_endian_test On older CPUs the switch_endian() syscall doesn't work. Currently that causes the switch_endian_test to just crash. Instead detect the failure and properly exit with a failure message. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819015727.1977134-9-mpe@ellerman.id.au --- .../powerpc/switch_endian/switch_endian_test.S | 23 ++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S index cc4930467235..7887f78cf072 100644 --- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S +++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S @@ -3,9 +3,13 @@ .data .balign 8 -message: +success_message: .ascii "success: switch_endian_test\n\0" + .balign 8 +failure_message: + .ascii "failure: switch_endian_test\n\0" + .section ".toc" .balign 8 pattern: @@ -64,6 +68,9 @@ FUNC_START(_start) li r0, __NR_switch_endian sc + tdi 0, 0, 0x48 // b +8 if the endian was switched + b .Lfail // exit if endian didn't switch + #include "check-reversed.S" /* Flip back, r0 already has the switch syscall number */ @@ -71,12 +78,20 @@ FUNC_START(_start) #include "check.S" + ld r4, success_message@got(%r2) + li r5, 28 // strlen(success_message) + li r14, 0 // exit status +.Lout: li r0, __NR_write li r3, 1 /* stdout */ - ld r4, message@got(%r2) - li r5, 28 /* strlen(message3) */ sc li r0, __NR_exit - li r3, 0 + mr r3, r14 sc b . + +.Lfail: + ld r4, failure_message@got(%r2) + li r5, 28 // strlen(failure_message) + li r14, 1 + b .Lout -- cgit v1.3.1 From db96221a683342fd4775fd820a4d5376cd2f2ed0 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 24 Aug 2020 23:12:31 +1000 Subject: selftests/powerpc: Fix prefixes in alignment_handler signal handler The signal handler in the alignment handler self test has the ability to jump over the instruction that triggered the signal. It does this by incrementing the PT_NIP in the user context by 4. If it were a prefixed instruction this will mean that the suffix is then executed which is incorrect. Instead check if the major opcode indicates a prefixed instruction (e.g. it is 1) and if so increment PT_NIP by 8. If ISA v3.1 is not available treat it as a word instruction even if the major opcode is 1. Fixes: 620a6473df36 ("selftests/powerpc: Add prefixed loads/stores to alignment_handler test") Signed-off-by: Jordan Niethe [mpe: Fix 32-bit build, rename haveprefixes to prefixes_enabled] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200824131231.14008-1-jniethe5@gmail.com --- tools/testing/selftests/powerpc/alignment/alignment_handler.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index e4063eba4a5b..2a0503bc7e49 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -62,6 +62,7 @@ int bufsize; int debug; int testing; volatile int gotsig; +bool prefixes_enabled; char *cipath = "/dev/fb0"; long cioffset; @@ -75,7 +76,12 @@ void sighandler(int sig, siginfo_t *info, void *ctx) } gotsig = sig; #ifdef __powerpc64__ - ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + if (prefixes_enabled) { + u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP]; + ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4); + } else { + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + } #else ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4; #endif @@ -646,6 +652,8 @@ int main(int argc, char *argv[]) exit(1); } + prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1); + rc |= test_harness(test_alignment_handler_vsx_206, "test_alignment_handler_vsx_206"); rc |= test_harness(test_alignment_handler_vsx_207, -- cgit v1.3.1 From 3932fcecd96221e18100055d623b736d0ff873a4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Sep 2020 12:28:27 -0700 Subject: selftests/seccomp: Add test for unknown SECCOMP_RET kill behavior While we were testing for the behavior of unknown seccomp filter return values, there was no test for how it acted in a thread group. Add a test in the thread group tests for this. Reviewed-by: Shuah Khan Acked-by: Christian Brauner Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 43 +++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7a6d40286a42..bfb382580493 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -774,8 +774,15 @@ void *kill_thread(void *data) return (void *)SIBLING_EXIT_UNKILLED; } +enum kill_t { + KILL_THREAD, + KILL_PROCESS, + RET_UNKNOWN +}; + /* Prepare a thread that will kill itself or both of us. */ -void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) +void kill_thread_or_group(struct __test_metadata *_metadata, + enum kill_t kill_how) { pthread_t thread; void *status; @@ -791,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) .len = (unsigned short)ARRAY_SIZE(filter_thread), .filter = filter_thread, }; + int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA; struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET|BPF_K, kill), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog_process = { @@ -808,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) } ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, - kill_process ? &prog_process : &prog_thread)); + kill_how == KILL_THREAD ? &prog_thread + : &prog_process)); /* * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS * flag cannot be downgraded by a new filter. */ - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); + if (kill_how == KILL_PROCESS) + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); /* Start a thread that will exit immediately. */ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); @@ -842,7 +852,7 @@ TEST(KILL_thread) child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, false); + kill_thread_or_group(_metadata, KILL_THREAD); _exit(38); } @@ -861,7 +871,7 @@ TEST(KILL_process) child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, true); + kill_thread_or_group(_metadata, KILL_PROCESS); _exit(38); } @@ -872,6 +882,27 @@ TEST(KILL_process) ASSERT_EQ(SIGSYS, WTERMSIG(status)); } +TEST(KILL_unknown) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, RET_UNKNOWN); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If the entire process was killed, we'll see SIGSYS. */ + EXPECT_TRUE(WIFSIGNALED(status)) { + TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); + } + ASSERT_EQ(SIGSYS, WTERMSIG(status)); +} + /* TODO(wad) add 64-bit versus 32-bit arg tests. */ TEST(arg_out_of_range) { -- cgit v1.3.1 From a23042882ff29ae2df04f46ac0d4c50f66e7e497 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 15 Apr 2020 12:15:01 +0800 Subject: selftests/seccomp: Use bitwise instead of arithmetic operator for flags This silences the following coccinelle warning: "WARNING: sum of probable bitmasks, consider |" tools/testing/selftests/seccomp/seccomp_bpf.c:3131:17-18: WARNING: sum of probable bitmasks, consider | tools/testing/selftests/seccomp/seccomp_bpf.c:3133:18-19: WARNING: sum of probable bitmasks, consider | tools/testing/selftests/seccomp/seccomp_bpf.c:3134:18-19: WARNING: sum of probable bitmasks, consider | tools/testing/selftests/seccomp/seccomp_bpf.c:3135:18-19: WARNING: sum of probable bitmasks, consider | Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1586924101-65940-1-git-send-email-zou_wei@huawei.com Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index bfb382580493..c5002fc25b00 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3173,11 +3173,11 @@ skip: static int user_notif_syscall(int nr, unsigned int flags) { struct sock_filter filter[] = { - BPF_STMT(BPF_LD+BPF_W+BPF_ABS, + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { -- cgit v1.3.1 From e6054fc1f865ac46936a6b5517ae2d36554c94f8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 8 Sep 2020 10:57:03 -0700 Subject: selftests/bpf: Add test for map_ptr arithmetic Change selftest map_ptr_kern.c with disabling inlining for one of subtests, which will fail the test without previous verifier change. Also added to verifier test for both "map_ptr += scalar" and "scalar += map_ptr" arithmetic. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200908175703.2463721-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/map_ptr_kern.c | 10 +++++++- tools/testing/selftests/bpf/verifier/map_ptr.c | 32 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index 982a2d8aa844..c325405751e2 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -82,6 +82,14 @@ static inline int check_default(struct bpf_map *indirect, return 1; } +static __noinline int +check_default_noinline(struct bpf_map *indirect, struct bpf_map *direct) +{ + VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32), + MAX_ENTRIES)); + return 1; +} + typedef struct { int counter; } atomic_t; @@ -107,7 +115,7 @@ static inline int check_hash(void) struct bpf_map *map = (struct bpf_map *)&m_hash; int i; - VERIFY(check_default(&hash->map, map)); + VERIFY(check_default_noinline(&hash->map, map)); VERIFY(hash->n_buckets == MAX_ENTRIES); VERIFY(hash->elem_size == 64); diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index b52209db8250..637f9293bda8 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -60,3 +60,35 @@ .result = ACCEPT, .retval = 1, }, +{ + "bpf_map_ptr: r = 0, map_ptr = map_ptr + r", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 4 }, + .result = ACCEPT, +}, +{ + "bpf_map_ptr: r = 0, r = r + map_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 4 }, + .result = ACCEPT, +}, -- cgit v1.3.1 From c6f7c753f71cccb21d1a7f0c1127ce7804374c5e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:37 +0200 Subject: lkdtm: remove set_fs-based tests Once we can't manipulate the address limit, we also can't test what happens when the manipulation is abused. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- drivers/misc/lkdtm/bugs.c | 10 ---------- drivers/misc/lkdtm/core.c | 2 -- drivers/misc/lkdtm/lkdtm.h | 2 -- drivers/misc/lkdtm/usercopy.c | 15 --------------- tools/testing/selftests/lkdtm/tests.txt | 2 -- 5 files changed, 31 deletions(-) (limited to 'tools') diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 4dfbfd51bdf7..a0675d4154d2 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -312,16 +312,6 @@ void lkdtm_CORRUPT_LIST_DEL(void) pr_err("list_del() corruption not detected!\n"); } -/* Test if unbalanced set_fs(KERNEL_DS)/set_fs(USER_DS) check exists. */ -void lkdtm_CORRUPT_USER_DS(void) -{ - pr_info("setting bad task size limit\n"); - set_fs(KERNEL_DS); - - /* Make sure we do not keep running with a KERNEL_DS! */ - force_sig(SIGKILL); -} - /* Test that VMAP_STACK is actually allocating with a leading guard page */ void lkdtm_STACK_GUARD_PAGE_LEADING(void) { diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index a5e344df9166..97803f213d9d 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -112,7 +112,6 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(CORRUPT_STACK_STRONG), CRASHTYPE(CORRUPT_LIST_ADD), CRASHTYPE(CORRUPT_LIST_DEL), - CRASHTYPE(CORRUPT_USER_DS), CRASHTYPE(STACK_GUARD_PAGE_LEADING), CRASHTYPE(STACK_GUARD_PAGE_TRAILING), CRASHTYPE(UNSET_SMEP), @@ -172,7 +171,6 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(USERCOPY_STACK_FRAME_FROM), CRASHTYPE(USERCOPY_STACK_BEYOND), CRASHTYPE(USERCOPY_KERNEL), - CRASHTYPE(USERCOPY_KERNEL_DS), CRASHTYPE(STACKLEAK_ERASING), CRASHTYPE(CFI_FORWARD_PROTO), #ifdef CONFIG_X86_32 diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index 8878538b2c13..6dec4c9b442f 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -27,7 +27,6 @@ void lkdtm_OVERFLOW_UNSIGNED(void); void lkdtm_ARRAY_BOUNDS(void); void lkdtm_CORRUPT_LIST_ADD(void); void lkdtm_CORRUPT_LIST_DEL(void); -void lkdtm_CORRUPT_USER_DS(void); void lkdtm_STACK_GUARD_PAGE_LEADING(void); void lkdtm_STACK_GUARD_PAGE_TRAILING(void); void lkdtm_UNSET_SMEP(void); @@ -96,7 +95,6 @@ void lkdtm_USERCOPY_STACK_FRAME_TO(void); void lkdtm_USERCOPY_STACK_FRAME_FROM(void); void lkdtm_USERCOPY_STACK_BEYOND(void); void lkdtm_USERCOPY_KERNEL(void); -void lkdtm_USERCOPY_KERNEL_DS(void); /* lkdtm_stackleak.c */ void lkdtm_STACKLEAK_ERASING(void); diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c index b833367a45d0..109e8d4302c1 100644 --- a/drivers/misc/lkdtm/usercopy.c +++ b/drivers/misc/lkdtm/usercopy.c @@ -325,21 +325,6 @@ free_user: vm_munmap(user_addr, PAGE_SIZE); } -void lkdtm_USERCOPY_KERNEL_DS(void) -{ - char __user *user_ptr = - (char __user *)(0xFUL << (sizeof(unsigned long) * 8 - 4)); - mm_segment_t old_fs = get_fs(); - char buf[10] = {0}; - - pr_info("attempting copy_to_user() to noncanonical address: %px\n", - user_ptr); - set_fs(KERNEL_DS); - if (copy_to_user(user_ptr, buf, sizeof(buf)) == 0) - pr_err("copy_to_user() to noncanonical address succeeded!?\n"); - set_fs(old_fs); -} - void __init lkdtm_usercopy_init(void) { /* Prepare cache that lacks SLAB_USERCOPY flag. */ diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 9d266e79c6a2..74a8d329a72c 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -9,7 +9,6 @@ EXCEPTION #CORRUPT_STACK_STRONG Crashes entire system on success CORRUPT_LIST_ADD list_add corruption CORRUPT_LIST_DEL list_del corruption -CORRUPT_USER_DS Invalid address limit on user-mode return STACK_GUARD_PAGE_LEADING STACK_GUARD_PAGE_TRAILING UNSET_SMEP CR4 bits went missing @@ -67,6 +66,5 @@ USERCOPY_STACK_FRAME_TO USERCOPY_STACK_FRAME_FROM USERCOPY_STACK_BEYOND USERCOPY_KERNEL -USERCOPY_KERNEL_DS STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO -- cgit v1.3.1 From 6c014694b1d2702cdc736d17b60746e7b95ba664 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Sep 2020 17:20:02 -0300 Subject: tools feature: Add missing -lzstd to the fast path feature detection We were failing that due to GTK2+ and then for the ZSTD test, which made test-all.c, the fast path feature detection file to fail and thus trigger building all of the feature tests, slowing down the test. Eventually the ZSTD test would be built and would succeed, since it had the needed -lzstd, avoiding: $ cat /tmp/build/perf/feature/test-all.make.output /usr/bin/ld: /tmp/ccRRJQ4u.o: in function `main_test_libzstd': /home/acme/git/perf/tools/build/feature/test-libzstd.c:8: undefined reference to `ZSTD_createCStream' /usr/bin/ld: /home/acme/git/perf/tools/build/feature/test-libzstd.c:9: undefined reference to `ZSTD_freeCStream' collect2: error: ld returned 1 exit status $ Fix it by adding -lzstd to the test-all target. Now I need an entry to 'perf test' to make sure that /tmp/build/perf/feature/test-all.make.output is empty... Fixes: 3b1c5d9659718263 ("tools build: Implement libzstd feature check, LIBZSTD_DIR and NO_LIBZSTD defines") Reviewed-by: Alexei Budankov Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200904202611.GJ3753976@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 977067e34dff..ec815ffc7777 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -91,7 +91,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd $(OUTPUT)test-hello.bin: $(BUILD) -- cgit v1.3.1 From 20719c82f4f80d712a47bedc2e9aa2a7305076db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Sep 2020 17:32:01 -0300 Subject: perf tools: Add build test with GTK+ So that when we use: make -C tools/perf build-test One of the entries will ask for building with GTK+ 2. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 9b651dfe0a6b..a90fa043c066 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -91,6 +91,7 @@ make_no_sdt := NO_SDT=1 make_no_syscall_tbl := NO_SYSCALL_TABLE=1 make_with_clangllvm := LIBCLANGLLVM=1 make_with_libpfm4 := LIBPFM4=1 +make_with_gtk2 := GTK2=1 make_tags := tags make_cscope := cscope make_help := help @@ -154,6 +155,7 @@ run += make_no_syscall_tbl run += make_with_babeltrace run += make_with_clangllvm run += make_with_libpfm4 +run += make_with_gtk2 run += make_help run += make_doc run += make_perf_o -- cgit v1.3.1 From 9f86d641ba2052f78a90016f000e691357d17947 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Sep 2020 14:58:47 +0900 Subject: perf list: Remove dead code in argument check The sep is already checked being not NULL. The code seems to be a leftover from some refactoring. Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200909055849.469612-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 0a7fe4cb5555..10ab5e40a34f 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -92,13 +92,6 @@ int cmd_list(int argc, const char **argv) else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; - if (sep == NULL) { - print_events(argv[i], raw_dump, !desc_flag, - long_desc_flag, - details_flag, - deprecated); - continue; - } sep_idx = sep - argv[i]; s = strdup(argv[i]); if (s == NULL) -- cgit v1.3.1 From fac49a3bc4030524c155d9d59f3d7d14e771fe6f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Sep 2020 14:58:48 +0900 Subject: perf list: Do not print 'Metric Groups:' unnecessarily It was printed unconditionally even if nothing is printed. Check if the output list empty when filter is given. Before: $ ./perf list duration List of pre-defined events (to be used in -e): duration_time [Tool event] Metric Groups: After: $ ./perf list duration List of pre-defined events (to be used in -e): duration_time [Tool event] Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200909055849.469612-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8831b964288f..38464d7d2d63 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -535,10 +535,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, } } - if (metricgroups && !raw) - printf("\nMetric Groups:\n\n"); - else if (metrics && !raw) - printf("\nMetrics:\n\n"); + if (!filter || !rblist__empty(&groups)) { + if (metricgroups && !raw) + printf("\nMetric Groups:\n\n"); + else if (metrics && !raw) + printf("\nMetrics:\n\n"); + } for (node = rb_first_cached(&groups.entries); node; node = next) { struct mep *me = container_of(node, struct mep, nd); -- cgit v1.3.1 From 7fb5eefd76394cfefb380724a87ca40b47d44405 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 9 Sep 2020 10:15:42 -0700 Subject: selftests/bpf: Fix test_sysctl_loop{1, 2} failure due to clang change Andrii reported that with latest clang, when building selftests, we have error likes: error: progs/test_sysctl_loop1.c:23:16: in function sysctl_tcp_mem i32 (%struct.bpf_sysctl*): Looks like the BPF stack limit of 512 bytes is exceeded. Please move large on stack variables into BPF per-cpu array map. The error is triggered by the following LLVM patch: https://reviews.llvm.org/D87134 For example, the following code is from test_sysctl_loop1.c: static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string"; ... } Without the above LLVM patch, the compiler did optimization to load the string (59 bytes long) with 7 64bit loads, 1 8bit load and 1 16bit load, occupying 64 byte stack size. With the above LLVM patch, the compiler only uses 8bit loads, but subregister is 32bit. So stack requirements become 4 * 59 = 236 bytes. Together with other stuff on the stack, total stack size exceeds 512 bytes, hence compiler complains and quits. To fix the issue, removing "volatile" key word or changing "volatile" to "const"/"static const" does not work, the string is put in .rodata.str1.1 section, which libbpf did not process it and errors out with libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1 libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name' in section '.rodata.str1.1' Defining the string const as global variable can fix the issue as it puts the string constant in '.rodata' section which is recognized by libbpf. In the future, when libbpf can process '.rodata.str*.*' properly, the global definition can be changed back to local definition. Defining tcp_mem_name as a global, however, triggered a verifier failure. ./test_progs -n 7/21 libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: invalid stack off=0 size=1 verification time 6975 usec stack depth 160+64 processed 889 insns (limit 1000000) max_states_per_insn 4 total_states 14 peak_states 14 mark_read 10 libbpf: -- END LOG -- libbpf: failed to load program 'sysctl_tcp_mem' libbpf: failed to load object 'test_sysctl_loop2.o' test_bpf_verif_scale:FAIL:114 #7/21 test_sysctl_loop2.o:FAIL This actually exposed a bpf program bug. In test_sysctl_loop{1,2}, we have code like const char tcp_mem_name[] = "<...long string...>"; ... char name[64]; ... for (i = 0; i < sizeof(tcp_mem_name); ++i) if (name[i] != tcp_mem_name[i]) return 0; In the above code, if sizeof(tcp_mem_name) > 64, name[i] access may be out of bound. The sizeof(tcp_mem_name) is 59 for test_sysctl_loop1.c and 79 for test_sysctl_loop2.c. Without promotion-to-global change, old compiler generates code where the overflowed stack access is actually filled with valid value, so hiding the bpf program bug. With promotion-to-global change, the code is different, more specifically, the previous loading constants to stack is gone, and "name" occupies stack[-64:0] and overflow access triggers a verifier error. To fix the issue, adjust "name" buffer size properly. Reported-by: Andrii Nakryiko Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200909171542.3673449-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/test_sysctl_loop1.c | 4 ++-- tools/testing/selftests/bpf/progs/test_sysctl_loop2.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index 458b0d69133e..553a282d816a 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -18,11 +18,11 @@ #define MAX_ULONG_STR_LEN 7 #define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN) +const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { - volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index b2e6f9b0894d..2b64bc563a12 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -18,11 +18,11 @@ #define MAX_ULONG_STR_LEN 7 #define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN) +const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop"; static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx) { - volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); -- cgit v1.3.1 From 8081ede1f73139687a75147d529cebcc5e4149c5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 8 Sep 2020 11:01:27 -0700 Subject: perf: Stop using deprecated bpf_program__title() Switch from deprecated bpf_program__title() API to bpf_program__section_name(). Also drop unnecessary error checks because neither bpf_program__title() nor bpf_program__section_name() can fail or return NULL. Fixes: 521095842027 ("libbpf: Deprecate notion of BPF program "title" in favor of "section name"") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Tobias Klauser Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20200908180127.1249-1-andriin@fb.com --- tools/perf/util/bpf-loader.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 2feb751516ab..0374adcb223c 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -328,12 +328,6 @@ config_bpf_program(struct bpf_program *prog) probe_conf.no_inlines = false; probe_conf.force_add = false; - config_str = bpf_program__title(prog, false); - if (IS_ERR(config_str)) { - pr_debug("bpf: unable to get title for program\n"); - return PTR_ERR(config_str); - } - priv = calloc(sizeof(*priv), 1); if (!priv) { pr_debug("bpf: failed to alloc priv\n"); @@ -341,6 +335,7 @@ config_bpf_program(struct bpf_program *prog) } pev = &priv->pev; + config_str = bpf_program__section_name(prog); pr_debug("bpf: config program '%s'\n", config_str); err = parse_prog_config(config_str, &main_str, &is_tp, pev); if (err) @@ -454,10 +449,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n, if (err) { const char *title; - title = bpf_program__title(prog, false); - if (!title) - title = "[unknown]"; - + title = bpf_program__section_name(prog); pr_debug("Failed to generate prologue for program %s\n", title); return err; -- cgit v1.3.1 From 7a16183316ddb9b55b12c2341a4947790a119556 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Sep 2020 20:26:30 -0700 Subject: perf stat: Remove dead code: no need to set os.evsel twice No need to set os.evsel twice. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Richter Link: http://lore.kernel.org/lkml/20200910032632.511566-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 493ec372fdec..4b57c0c07632 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -946,7 +946,6 @@ static void print_metric_headers(struct perf_stat_config *config, out.print_metric = print_metric_header; out.new_line = new_line_metric; out.force_header = true; - os.evsel = counter; perf_stat__print_shadow_stats(config, counter, 0, 0, &out, -- cgit v1.3.1 From 9e34c1c87e56dc4a53e95fa2a68191c75e10b46d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Sep 2020 20:26:31 -0700 Subject: perf metricgroup: Fix typo in comment. Add missing character. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Richter Link: http://lore.kernel.org/lkml/20200910032632.511566-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 38464d7d2d63..5476e0767e8c 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -150,7 +150,7 @@ static void expr_ids__exit(struct expr_ids *ids) } /** - * Find a group of events in perf_evlist that correpond to those from a parsed + * Find a group of events in perf_evlist that correspond to those from a parsed * metric expression. Note, as find_evsel_group is called in the same order as * perf_evlist was constructed, metric_no_merge doesn't need to test for * underfilling a group. -- cgit v1.3.1 From fe0aed19b266fdcbb5dd7abc9ac9b4a64ae82f91 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 7 Sep 2020 21:01:54 +0800 Subject: perf test: Introduce script for Arm CoreSight testing We need a simple method to test Perf with ARM CoreSight drivers, this could be used for smoke testing when new patch is coming for perf or CoreSight drivers, and we also can use the test to confirm if the CoreSight has been enabled successfully on new platforms. This patch introduces the shell script test_arm_coresight.sh which is under the 'pert test' framework. This script provides three testing scenarios: Test scenario 1: traverse all possible paths between source and sink For traversing possible paths, simply to say, the testing rationale is source oriented testing, it traverses every source (now only refers to ETM device) and test its all possible sinks. To search the complete paths from one specific source to its sinks, this patch relies on the sysfs '/sys/bus/coresight/devices/devX/out:Y' for depth-first search (DFS) for iteration connected device nodes, if the output device is detected as a sink device (the script will exclude TPIU device which can not be supported for perf PMU), then it will test trace data recording and decoding for it. The script runs three output testings for every trace data: - Test branch samples dumping with 'perf script' command; - Test branch samples reporting with 'perf report' command; - Use option '--itrace=i1000i' to insert synthesized instructions events and the script will check if perf can output the percentage value successfully based on the instruction samples. Test scenario 2: system-wide test For system-wide testing, it passes option '-a' to perf tool to enable tracing on all CPUs, so it's hard to say which program will be traced. But perf tool itself contributes much overload in this case, so it will parse trace data and check if process 'perf' can be detected or not. Test scenario 3: snapshot mode test. For snapshot mode testing, it uses 'dd' command to launch a long running program, so this can give chance to send signal -USR2; it will check the captured trace data contains 'dd' related thread info or not. If any test fails, it will report failure and directly exit with error. This test will be only applied on a platform with PMU event 'cs_etm//', otherwise will skip the testing. Below is detailed usage for it: # cd $linux/tools/perf -> This is important so can use shell script # perf test list [...] 70: probe libc's inet_pton & backtrace it with ping 71: Check Arm CoreSight trace data recording and synthesized samples 72: Check open filename arg using perf trace + vfs_getname 73: Zstd perf.data compression/decompression 74: Add vfs_getname probe to get syscall args filenames 75: Use vfs_getname probe to get syscall args filenames # perf test 71 71: Check Arm CoreSight trace data recording and branch samples: Ok Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Suzuki Poulouse Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200907130154.9601-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 183 +++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100755 tools/perf/tests/shell/test_arm_coresight.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh new file mode 100755 index 000000000000..8d84fdbed6a6 --- /dev/null +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -0,0 +1,183 @@ +#!/bin/sh +# Check Arm CoreSight trace data recording and synthesized samples + +# Uses the 'perf record' to record trace data with Arm CoreSight sinks; +# then verify if there have any branch samples and instruction samples +# are generated by CoreSight with 'perf script' and 'perf report' +# commands. + +# SPDX-License-Identifier: GPL-2.0 +# Leo Yan , 2020 + +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +file=$(mktemp /tmp/temporary_file.XXXXX) + +skip_if_no_cs_etm_event() { + perf list | grep -q 'cs_etm//' && return 0 + + # cs_etm event doesn't exist + return 2 +} + +skip_if_no_cs_etm_event || exit 2 + +cleanup_files() +{ + rm -f ${perfdata} + rm -f ${file} +} + +trap cleanup_files exit + +record_touch_file() { + echo "Recording trace (only user mode) with path: CPU$2 => $1" + rm -f $file + perf record -o ${perfdata} -e cs_etm/@$1/u --per-thread \ + -- taskset -c $2 touch $file +} + +perf_script_branch_samples() { + echo "Looking at perf.data file for dumping branch samples:" + + # Below is an example of the branch samples dumping: + # touch 6512 1 branches:u: ffffb220824c strcmp+0xc (/lib/aarch64-linux-gnu/ld-2.27.so) + # touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so) + # touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so) + perf script -F,-time -i ${perfdata} | \ + egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +" +} + +perf_report_branch_samples() { + echo "Looking at perf.data file for reporting branch samples:" + + # Below is an example of the branch samples reporting: + # 73.04% 73.04% touch libc-2.27.so [.] _dl_addr + # 7.71% 7.71% touch libc-2.27.so [.] getenv + # 2.59% 2.59% touch ld-2.27.so [.] strcmp + perf report --stdio -i ${perfdata} | \ + egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " +} + +perf_report_instruction_samples() { + echo "Looking at perf.data file for instruction samples:" + + # Below is an example of the instruction samples reporting: + # 68.12% touch libc-2.27.so [.] _dl_addr + # 5.80% touch libc-2.27.so [.] getenv + # 4.35% touch ld-2.27.so [.] _dl_fixup + perf report --itrace=i1000i --stdio -i ${perfdata} | \ + egrep " +[0-9]+\.[0-9]+% +$1" +} + +is_device_sink() { + # If the node of "enable_sink" is existed under the device path, this + # means the device is a sink device. Need to exclude 'tpiu' since it + # cannot support perf PMU. + echo "$1" | egrep -q -v "tpiu" + + if [ $? -eq 0 -a -e "$1/enable_sink" ]; then + + pmu_dev="/sys/bus/event_source/devices/cs_etm/sinks/$2" + + # Warn if the device is not supported by PMU + if ! [ -f $pmu_dev ]; then + echo "PMU doesn't support $pmu_dev" + fi + + return 0 + fi + + # Otherwise, it's not a sink device + return 1 +} + +arm_cs_iterate_devices() { + for dev in $1/connections/out\:*; do + + # Skip testing if it's not a directory + ! [ -d $dev ] && continue; + + # Read out its symbol link file name + path=`readlink -f $dev` + + # Extract device name from path, e.g. + # path = '/sys/devices/platform/20010000.etf/tmc_etf0' + # `> device_name = 'tmc_etf0' + device_name=$(basename $path) + + if is_device_sink $path $devce_name; then + + record_touch_file $device_name $2 && + perf_script_branch_samples touch && + perf_report_branch_samples touch && + perf_report_instruction_samples touch + + err=$? + + # Exit when find failure + [ $err != 0 ] && exit $err + fi + + arm_cs_iterate_devices $dev $2 + done +} + +arm_cs_etm_traverse_path_test() { + # Iterate for every ETM device + for dev in /sys/bus/coresight/devices/etm*; do + + # Find the ETM device belonging to which CPU + cpu=`cat $dev/cpu` + + echo $dev + echo $cpu + + # Use depth-first search (DFS) to iterate outputs + arm_cs_iterate_devices $dev $cpu + done +} + +arm_cs_etm_system_wide_test() { + echo "Recording trace with system wide mode" + perf record -o ${perfdata} -e cs_etm// -a -- ls + + perf_script_branch_samples perf && + perf_report_branch_samples perf && + perf_report_instruction_samples perf + + err=$? + + # Exit when find failure + [ $err != 0 ] && exit $err +} + +arm_cs_etm_snapshot_test() { + echo "Recording trace with snapshot mode" + perf record -o ${perfdata} -e cs_etm// -S \ + -- dd if=/dev/zero of=/dev/null & + PERFPID=$! + + # Wait for perf program + sleep 1 + + # Send signal to snapshot trace data + kill -USR2 $PERFPID + + # Stop perf program + kill $PERFPID + wait $PERFPID + + perf_script_branch_samples dd && + perf_report_branch_samples dd && + perf_report_instruction_samples dd + + err=$? + + # Exit when find failure + [ $err != 0 ] && exit $err +} + +arm_cs_etm_traverse_path_test +arm_cs_etm_system_wide_test +arm_cs_etm_snapshot_test +exit 0 -- cgit v1.3.1 From 0d52b7889be17076e0c9c535d74db9c4c8b506ce Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 7 Sep 2020 12:11:29 +0530 Subject: perf jevents: Make json_events() static and ditch jevents.h file This patch removes jevents.h and makes json_events function static. Signed-off-by: Kajol Jain Reviewed-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200907064133.75090-2-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 3 +-- tools/perf/pmu-events/jevents.h | 23 ----------------------- 2 files changed, 1 insertion(+), 25 deletions(-) delete mode 100644 tools/perf/pmu-events/jevents.h (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index e37d17830c5c..d5558df6e836 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -48,7 +48,6 @@ #include #include "jsmn.h" #include "json.h" -#include "jevents.h" int verbose; char *prog; @@ -513,7 +512,7 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc, } /* Call func with each event in the json file */ -int json_events(const char *fn, +static int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h deleted file mode 100644 index 2afc8304529e..000000000000 --- a/tools/perf/pmu-events/jevents.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef JEVENTS_H -#define JEVENTS_H 1 - -int json_events(const char *fn, - int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc, - char *pmu, - char *unit, char *perpkg, char *metric_expr, - char *metric_name, char *metric_group, - char *deprecated, char *metric_constraint), - void *data); -char *get_cpu_str(void); - -#ifndef min -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) -#endif - -#endif -- cgit v1.3.1 From 71a374bb1847824b60a3bfdd3af8a13b6ec3a22d Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 7 Sep 2020 12:11:30 +0530 Subject: perf jevents: Add new structure to pass json fields. This patch adds new structure called 'json_event' inside jevents.c file to improve the callback prototype inside jevent files. Initially, whenever user want to add new field, they need to update in all function callback which make it more and more complex with increased number of parmeters. With this change, we just need to add it in new structure 'json_event'. Signed-off-by: Kajol Jain Reviewed-by: Andi Kleen Reviewed-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jin Yao Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200907064133.75090-3-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 212 ++++++++++++++++++---------------------- 1 file changed, 97 insertions(+), 115 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index d5558df6e836..94c54f5616ab 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -52,6 +52,23 @@ int verbose; char *prog; +struct json_event { + char *name; + char *event; + char *desc; + char *long_desc; + char *pmu; + char *unit; + char *perpkg; + char *metric_expr; + char *metric_name; + char *metric_group; + char *deprecated; + char *metric_constraint; +}; + +typedef int (*func)(void *data, struct json_event *je); + int eprintf(int level, int var, const char *fmt, ...) { @@ -70,11 +87,6 @@ int eprintf(int level, int var, const char *fmt, ...) return ret; } -__attribute__((weak)) char *get_cpu_str(void) -{ - return NULL; -} - static void addfield(char *map, char **dst, const char *sep, const char *a, jsmntok_t *bt) { @@ -318,12 +330,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) close_table = 1; } -static int print_events_table_entry(void *data, char *name, char *event, - char *desc, char *long_desc, - char *pmu, char *unit, char *perpkg, - char *metric_expr, - char *metric_name, char *metric_group, - char *deprecated, char *metric_constraint) +static int print_events_table_entry(void *data, struct json_event *je) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -335,30 +342,30 @@ static int print_events_table_entry(void *data, char *name, char *event, */ fprintf(outfp, "{\n"); - if (name) - fprintf(outfp, "\t.name = \"%s\",\n", name); - if (event) - fprintf(outfp, "\t.event = \"%s\",\n", event); - fprintf(outfp, "\t.desc = \"%s\",\n", desc); + if (je->name) + fprintf(outfp, "\t.name = \"%s\",\n", je->name); + if (je->event) + fprintf(outfp, "\t.event = \"%s\",\n", je->event); + fprintf(outfp, "\t.desc = \"%s\",\n", je->desc); fprintf(outfp, "\t.topic = \"%s\",\n", topic); - if (long_desc && long_desc[0]) - fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc); - if (pmu) - fprintf(outfp, "\t.pmu = \"%s\",\n", pmu); - if (unit) - fprintf(outfp, "\t.unit = \"%s\",\n", unit); - if (perpkg) - fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg); - if (metric_expr) - fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); - if (metric_name) - fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); - if (metric_group) - fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); - if (deprecated) - fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated); - if (metric_constraint) - fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint); + if (je->long_desc && je->long_desc[0]) + fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc); + if (je->pmu) + fprintf(outfp, "\t.pmu = \"%s\",\n", je->pmu); + if (je->unit) + fprintf(outfp, "\t.unit = \"%s\",\n", je->unit); + if (je->perpkg) + fprintf(outfp, "\t.perpkg = \"%s\",\n", je->perpkg); + if (je->metric_expr) + fprintf(outfp, "\t.metric_expr = \"%s\",\n", je->metric_expr); + if (je->metric_name) + fprintf(outfp, "\t.metric_name = \"%s\",\n", je->metric_name); + if (je->metric_group) + fprintf(outfp, "\t.metric_group = \"%s\",\n", je->metric_group); + if (je->deprecated) + fprintf(outfp, "\t.deprecated = \"%s\",\n", je->deprecated); + if (je->metric_constraint) + fprintf(outfp, "\t.metric_constraint = \"%s\",\n", je->metric_constraint); fprintf(outfp, "},\n"); return 0; @@ -380,17 +387,17 @@ struct event_struct { char *metric_constraint; }; -#define ADD_EVENT_FIELD(field) do { if (field) { \ - es->field = strdup(field); \ +#define ADD_EVENT_FIELD(field) do { if (je->field) { \ + es->field = strdup(je->field); \ if (!es->field) \ goto out_free; \ } } while (0) #define FREE_EVENT_FIELD(field) free(es->field) -#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\ - *field = strdup(es->field); \ - if (!*field) \ +#define TRY_FIXUP_FIELD(field) do { if (es->field && !je->field) {\ + je->field = strdup(es->field); \ + if (!je->field) \ return -ENOMEM; \ } } while (0) @@ -421,11 +428,7 @@ static void free_arch_std_events(void) } } -static int save_arch_std_events(void *data, char *name, char *event, - char *desc, char *long_desc, char *pmu, - char *unit, char *perpkg, char *metric_expr, - char *metric_name, char *metric_group, - char *deprecated, char *metric_constraint) +static int save_arch_std_events(void *data, struct json_event *je) { struct event_struct *es; @@ -485,11 +488,8 @@ static char *real_event(const char *name, char *event) } static int -try_fixup(const char *fn, char *arch_std, char **event, char **desc, - char **name, char **long_desc, char **pmu, char **filter, - char **perpkg, char **unit, char **metric_expr, char **metric_name, - char **metric_group, unsigned long long eventcode, - char **deprecated, char **metric_constraint) +try_fixup(const char *fn, char *arch_std, unsigned long long eventcode, + struct json_event *je) { /* try to find matching event from arch standard values */ struct event_struct *es; @@ -498,8 +498,8 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc, if (!strcmp(arch_std, es->name)) { if (!eventcode && es->event) { /* allow EventCode to be overridden */ - free(*event); - *event = NULL; + free(je->event); + je->event = NULL; } FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD); return 0; @@ -513,13 +513,8 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc, /* Call func with each event in the json file */ static int json_events(const char *fn, - int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc, - char *pmu, char *unit, char *perpkg, - char *metric_expr, - char *metric_name, char *metric_group, - char *deprecated, char *metric_constraint), - void *data) + int (*func)(void *data, struct json_event *je), + void *data) { int err; size_t size; @@ -537,18 +532,10 @@ static int json_events(const char *fn, EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array"); tok = tokens + 1; for (i = 0; i < tokens->size; i++) { - char *event = NULL, *desc = NULL, *name = NULL; - char *long_desc = NULL; + char *event = NULL; char *extra_desc = NULL; - char *pmu = NULL; char *filter = NULL; - char *perpkg = NULL; - char *unit = NULL; - char *metric_expr = NULL; - char *metric_name = NULL; - char *metric_group = NULL; - char *deprecated = NULL; - char *metric_constraint = NULL; + struct json_event je = {}; char *arch_std = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; @@ -583,14 +570,14 @@ static int json_events(const char *fn, eventcode |= strtoul(code, NULL, 0) << 21; free(code); } else if (json_streq(map, field, "EventName")) { - addfield(map, &name, "", "", val); + addfield(map, &je.name, "", "", val); } else if (json_streq(map, field, "BriefDescription")) { - addfield(map, &desc, "", "", val); - fixdesc(desc); + addfield(map, &je.desc, "", "", val); + fixdesc(je.desc); } else if (json_streq(map, field, "PublicDescription")) { - addfield(map, &long_desc, "", "", val); - fixdesc(long_desc); + addfield(map, &je.long_desc, "", "", val); + fixdesc(je.long_desc); } else if (json_streq(map, field, "PEBS") && nz) { precise = val; } else if (json_streq(map, field, "MSRIndex") && nz) { @@ -610,34 +597,34 @@ static int json_events(const char *fn, ppmu = field_to_perf(unit_to_pmu, map, val); if (ppmu) { - pmu = strdup(ppmu); + je.pmu = strdup(ppmu); } else { - if (!pmu) - pmu = strdup("uncore_"); - addfield(map, &pmu, "", "", val); - for (s = pmu; *s; s++) + if (!je.pmu) + je.pmu = strdup("uncore_"); + addfield(map, &je.pmu, "", "", val); + for (s = je.pmu; *s; s++) *s = tolower(*s); } - addfield(map, &desc, ". ", "Unit: ", NULL); - addfield(map, &desc, "", pmu, NULL); - addfield(map, &desc, "", " ", NULL); + addfield(map, &je.desc, ". ", "Unit: ", NULL); + addfield(map, &je.desc, "", je.pmu, NULL); + addfield(map, &je.desc, "", " ", NULL); } else if (json_streq(map, field, "Filter")) { addfield(map, &filter, "", "", val); } else if (json_streq(map, field, "ScaleUnit")) { - addfield(map, &unit, "", "", val); + addfield(map, &je.unit, "", "", val); } else if (json_streq(map, field, "PerPkg")) { - addfield(map, &perpkg, "", "", val); + addfield(map, &je.perpkg, "", "", val); } else if (json_streq(map, field, "Deprecated")) { - addfield(map, &deprecated, "", "", val); + addfield(map, &je.deprecated, "", "", val); } else if (json_streq(map, field, "MetricName")) { - addfield(map, &metric_name, "", "", val); + addfield(map, &je.metric_name, "", "", val); } else if (json_streq(map, field, "MetricGroup")) { - addfield(map, &metric_group, "", "", val); + addfield(map, &je.metric_group, "", "", val); } else if (json_streq(map, field, "MetricConstraint")) { - addfield(map, &metric_constraint, "", "", val); + addfield(map, &je.metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { - addfield(map, &metric_expr, "", "", val); - for (s = metric_expr; *s; s++) + addfield(map, &je.metric_expr, "", "", val); + for (s = je.metric_expr; *s; s++) *s = tolower(*s); } else if (json_streq(map, field, "ArchStdEvent")) { addfield(map, &arch_std, "", "", val); @@ -646,7 +633,7 @@ static int json_events(const char *fn, } /* ignore unknown fields */ } - if (precise && desc && !strstr(desc, "(Precise Event)")) { + if (precise && je.desc && !strstr(je.desc, "(Precise Event)")) { if (json_streq(map, precise, "2")) addfield(map, &extra_desc, " ", "(Must be precise)", NULL); @@ -656,48 +643,43 @@ static int json_events(const char *fn, } snprintf(buf, sizeof buf, "event=%#llx", eventcode); addfield(map, &event, ",", buf, NULL); - if (desc && extra_desc) - addfield(map, &desc, " ", extra_desc, NULL); - if (long_desc && extra_desc) - addfield(map, &long_desc, " ", extra_desc, NULL); + if (je.desc && extra_desc) + addfield(map, &je.desc, " ", extra_desc, NULL); + if (je.long_desc && extra_desc) + addfield(map, &je.long_desc, " ", extra_desc, NULL); if (filter) addfield(map, &event, ",", filter, NULL); if (msr != NULL) addfield(map, &event, ",", msr->pname, msrval); - if (name) - fixname(name); + if (je.name) + fixname(je.name); if (arch_std) { /* * An arch standard event is referenced, so try to * fixup any unassigned values. */ - err = try_fixup(fn, arch_std, &event, &desc, &name, - &long_desc, &pmu, &filter, &perpkg, - &unit, &metric_expr, &metric_name, - &metric_group, eventcode, - &deprecated, &metric_constraint); + err = try_fixup(fn, arch_std, eventcode, &je); if (err) goto free_strings; } - err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg, metric_expr, metric_name, - metric_group, deprecated, metric_constraint); + je.event = real_event(je.name, event); + err = func(data, &je); free_strings: free(event); - free(desc); - free(name); - free(long_desc); + free(je.desc); + free(je.name); + free(je.long_desc); free(extra_desc); - free(pmu); + free(je.pmu); free(filter); - free(perpkg); - free(deprecated); - free(unit); - free(metric_expr); - free(metric_name); - free(metric_group); - free(metric_constraint); + free(je.perpkg); + free(je.deprecated); + free(je.unit); + free(je.metric_expr); + free(je.metric_name); + free(je.metric_group); + free(je.metric_constraint); free(arch_std); if (err) -- cgit v1.3.1 From 560ccbc4a52c567096023e0c6a9b920048e59017 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 7 Sep 2020 12:11:31 +0530 Subject: perf jevents: Add support for parsing perchip/percore events Initially, every time we want to add new terms like chip, core thread etc, we need to create corrsponding fields in pmu_events and event struct. This patch adds an enum called 'aggr_mode_class' which store all these aggregation like perchip/percore. It also adds new field 'aggr_mode' to capture these terms. Now, if user wants to add any new term, they just need to add it in the enum defined. Signed-off-by: Kajol Jain Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: John Garry Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200907064133.75090-4-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 20 ++++++++++++++++++++ tools/perf/pmu-events/pmu-events.h | 6 ++++++ 2 files changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 94c54f5616ab..99df41a9543d 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -48,6 +48,7 @@ #include #include "jsmn.h" #include "json.h" +#include "pmu-events.h" int verbose; char *prog; @@ -60,6 +61,7 @@ struct json_event { char *pmu; char *unit; char *perpkg; + char *aggr_mode; char *metric_expr; char *metric_name; char *metric_group; @@ -67,6 +69,17 @@ struct json_event { char *metric_constraint; }; +enum aggr_mode_class convert(const char *aggr_mode) +{ + if (!strcmp(aggr_mode, "PerCore")) + return PerCore; + else if (!strcmp(aggr_mode, "PerChip")) + return PerChip; + + pr_err("%s: Wrong AggregationMode value '%s'\n", prog, aggr_mode); + return -1; +} + typedef int (*func)(void *data, struct json_event *je); int eprintf(int level, int var, const char *fmt, ...) @@ -356,6 +369,8 @@ static int print_events_table_entry(void *data, struct json_event *je) fprintf(outfp, "\t.unit = \"%s\",\n", je->unit); if (je->perpkg) fprintf(outfp, "\t.perpkg = \"%s\",\n", je->perpkg); + if (je->aggr_mode) + fprintf(outfp, "\t.aggr_mode = \"%d\",\n", convert(je->aggr_mode)); if (je->metric_expr) fprintf(outfp, "\t.metric_expr = \"%s\",\n", je->metric_expr); if (je->metric_name) @@ -380,6 +395,7 @@ struct event_struct { char *pmu; char *unit; char *perpkg; + char *aggr_mode; char *metric_expr; char *metric_name; char *metric_group; @@ -409,6 +425,7 @@ struct event_struct { op(pmu); \ op(unit); \ op(perpkg); \ + op(aggr_mode); \ op(metric_expr); \ op(metric_name); \ op(metric_group); \ @@ -614,6 +631,8 @@ static int json_events(const char *fn, addfield(map, &je.unit, "", "", val); } else if (json_streq(map, field, "PerPkg")) { addfield(map, &je.perpkg, "", "", val); + } else if (json_streq(map, field, "AggregationMode")) { + addfield(map, &je.aggr_mode, "", "", val); } else if (json_streq(map, field, "Deprecated")) { addfield(map, &je.deprecated, "", "", val); } else if (json_streq(map, field, "MetricName")) { @@ -674,6 +693,7 @@ free_strings: free(je.pmu); free(filter); free(je.perpkg); + free(je.aggr_mode); free(je.deprecated); free(je.unit); free(je.metric_expr); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index c8f306b572f4..7da1a3743b77 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -2,6 +2,11 @@ #ifndef PMU_EVENTS_H #define PMU_EVENTS_H +enum aggr_mode_class { + PerChip = 1, + PerCore +}; + /* * Describe each PMU event. Each CPU has a table of PMU events. */ @@ -14,6 +19,7 @@ struct pmu_event { const char *pmu; const char *unit; const char *perpkg; + const char *aggr_mode; const char *metric_expr; const char *metric_name; const char *metric_group; -- cgit v1.3.1 From f5a489dc8189bd6a6a418e408bada1ed5b9d02b1 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 7 Sep 2020 12:11:32 +0530 Subject: perf metricgroup: Pass pmu_event structure as a parameter for arch_get_runtimeparam() This patch adds passing of pmu_event as a parameter in function 'arch_get_runtimeparam' which can be used to get details like if the event is percore/perchip. Signed-off-by: Kajol Jain Acked-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: John Garry Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200907064133.75090-5-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/header.c | 7 +++++-- tools/perf/util/metricgroup.c | 5 ++--- tools/perf/util/metricgroup.h | 3 ++- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 1a950171a66f..58b2d610aadb 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -40,8 +40,11 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return bufp; } -int arch_get_runtimeparam(void) +int arch_get_runtimeparam(struct pmu_event *pe) { int count; - return sysfs__read_int("/devices/hv_24x7/interface/sockets", &count) < 0 ? 1 : count; + char path[PATH_MAX] = "/devices/hv_24x7/interface/"; + + atoi(pe->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip"); + return sysfs__read_int(path, &count) < 0 ? 1 : count; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 5476e0767e8c..d03bac65a3c2 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -15,7 +15,6 @@ #include "rblist.h" #include #include -#include "pmu-events/pmu-events.h" #include "strlist.h" #include #include @@ -636,7 +635,7 @@ static bool metricgroup__has_constraint(struct pmu_event *pe) return false; } -int __weak arch_get_runtimeparam(void) +int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) { return 1; } @@ -904,7 +903,7 @@ static int add_metric(struct list_head *metric_list, } else { int j, count; - count = arch_get_runtimeparam(); + count = arch_get_runtimeparam(pe); /* This loop is added to create multiple * events depend on count value and add diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 62623a39cbec..491a5d78252d 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -5,6 +5,7 @@ #include #include #include +#include "pmu-events/pmu-events.h" struct evsel; struct evlist; @@ -52,6 +53,6 @@ int metricgroup__parse_groups_test(struct evlist *evlist, void metricgroup__print(bool metrics, bool groups, char *filter, bool raw, bool details); bool metricgroup__has_metric(const char *metric); -int arch_get_runtimeparam(void); +int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); #endif -- cgit v1.3.1 From b1f815c479c1c7422f4b6552c967be9d39a9f127 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 7 Sep 2020 12:11:33 +0530 Subject: perf vendor events power9: Add hv_24x7 core level metric events This patch adds hv_24x7 core level events in nest_metric.json file and also add PerChip/PerCore field in metric events. Result: power9 platform: command:# ./perf stat --metric-only -M PowerBUS_Frequency -C 0 -I 1000 1.000070601 1.9 2.0 2.000253881 2.0 1.9 3.000364810 2.0 2.0 Signed-off-by: Kajol Jain Acked-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: John Garry Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200907064133.75090-6-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/powerpc/power9/nest_metrics.json | 35 ++++++++++++++-------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json index 8383a37647ad..7a5d1bf543f8 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json @@ -1,37 +1,46 @@ [ { - "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)", - "MetricName": "Memory_RD_BW_Chip", - "MetricGroup": "Memory_BW", - "ScaleUnit": "1.6e-2MB" + "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)", + "MetricName": "Memory_RD_BW_Chip", + "MetricGroup": "Memory_BW", + "ScaleUnit": "1.6e-2MB", + "AggregationMode": "PerChip" }, { "MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )", - "MetricName": "Memory_WR_BW_Chip", - "MetricGroup": "Memory_BW", - "ScaleUnit": "1.6e-2MB" + "MetricName": "Memory_WR_BW_Chip", + "MetricGroup": "Memory_BW", + "ScaleUnit": "1.6e-2MB", + "AggregationMode": "PerChip" }, { "MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )", - "MetricName": "PowerBUS_Frequency", - "ScaleUnit": "2.5e-7GHz" + "MetricName": "PowerBUS_Frequency", + "ScaleUnit": "2.5e-7GHz", + "AggregationMode": "PerChip" + }, + { + "MetricExpr": "(hv_24x7@CPM_CS_32MHZ_CYC\\,domain\\=3\\,core\\=?@ )", + "MetricName": "CPM_CS_32MHZ_CYC", + "ScaleUnit": "1MHz", + "AggregationMode": "PerCore" }, { "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@", "MetricName" : "mcs01-read", - "MetricGroup" : "memory_bw", + "MetricGroup" : "memory-bandwidth", "ScaleUnit": "6.1e-5MB" }, { "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@", "MetricName" : "mcs23-read", - "MetricGroup" : "memory_bw", + "MetricGroup" : "memory-bandwidth", "ScaleUnit": "6.1e-5MB" }, { "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@", "MetricName" : "mcs01-write", - "MetricGroup" : "memory_bw", + "MetricGroup" : "memory-bandwidth", "ScaleUnit": "6.1e-5MB" }, { @@ -48,7 +57,7 @@ { "MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)", "MetricName" : "Memory-bandwidth-MCS", - "MetricGroup" : "memory_bw", + "MetricGroup" : "memory-bandwidth", "ScaleUnit": "6.1e-5MB" } ] -- cgit v1.3.1 From 8366f0d268c20929d82d4b1407bd4c6f9232bdec Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 27 Aug 2020 21:32:01 +0200 Subject: perf tests: Call test_attr__open() directly There's no longer need to call test_attr__open() from sys_perf_event_open(), because both 'perf record' and 'perf stat' call evsel__open_cpu(), so we can call it directly from there and not polute the perf-sys.h header. Committer testing: Before and after: # perf test attr 17: Setup struct perf_event_attr : Ok 49: Synthesize attr update : Ok # perf test -v attr 17: Setup struct perf_event_attr : --- start --- test child forked, pid 2170868 running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-any_ret' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-any_ret' running '/home/acme/libexec/perf-core/tests/attr/test-record-C0' running '/home/acme/libexec/perf-core/tests/attr/test-record-graph-fp' running '/home/acme/libexec/perf-core/tests/attr/test-record-period' running '/home/acme/libexec/perf-core/tests/attr/test-record-group-sampling' running '/home/acme/libexec/perf-core/tests/attr/test-record-freq' running '/home/acme/libexec/perf-core/tests/attr/test-stat-detailed-3' running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-k' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-k' running '/home/acme/libexec/perf-core/tests/attr/test-stat-group1' running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-u' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-u' running '/home/acme/libexec/perf-core/tests/attr/test-stat-basic' running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-any_call' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-any_call' running '/home/acme/libexec/perf-core/tests/attr/test-stat-default' running '/home/acme/libexec/perf-core/tests/attr/test-record-graph-dwarf' running '/home/acme/libexec/perf-core/tests/attr/test-record-no-buffering' running '/home/acme/libexec/perf-core/tests/attr/test-record-raw' running '/home/acme/libexec/perf-core/tests/attr/test-stat-detailed-2' running '/home/acme/libexec/perf-core/tests/attr/test-record-count' running '/home/acme/libexec/perf-core/tests/attr/test-record-data' running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-any' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-any' running '/home/acme/libexec/perf-core/tests/attr/test-stat-group' running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-any' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-branch-any' running '/home/acme/libexec/perf-core/tests/attr/test-record-graph-default' running '/home/acme/libexec/perf-core/tests/attr/test-record-no-samples' running '/home/acme/libexec/perf-core/tests/attr/test-stat-C0' running '/home/acme/libexec/perf-core/tests/attr/test-record-no-inherit' running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-ind_call' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-ind_call' running '/home/acme/libexec/perf-core/tests/attr/test-record-basic' running '/home/acme/libexec/perf-core/tests/attr/test-record-group1' running '/home/acme/libexec/perf-core/tests/attr/test-record-pfm-period' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-pfm-period' running '/home/acme/libexec/perf-core/tests/attr/test-stat-detailed-1' running '/home/acme/libexec/perf-core/tests/attr/test-stat-no-inherit' running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-hv' unsupp '/home/acme/libexec/perf-core/tests/attr/test-record-branch-filter-hv' running '/home/acme/libexec/perf-core/tests/attr/test-record-group' test child finished with 0 ---- end ---- Setup struct perf_event_attr: Ok 49: Synthesize attr update : --- start --- test child forked, pid 2171004 test child finished with 0 ---- end ---- Synthesize attr update: Ok # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200827193201.GB127372@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-sys.h | 22 ++-------------------- tools/perf/tests/attr.c | 2 +- tools/perf/util/evsel.c | 5 +++++ tools/perf/util/python.c | 2 +- tools/perf/util/util.h | 6 ++++++ 5 files changed, 15 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 15e458e150bd..7a2264e1e4e1 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -9,31 +9,13 @@ struct perf_event_attr; -extern bool test_attr__enabled; -void test_attr__ready(void); -void test_attr__init(void); -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, - int fd, int group_fd, unsigned long flags); - -#ifndef HAVE_ATTR_TEST -#define HAVE_ATTR_TEST 1 -#endif - static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { - int fd; - - fd = syscall(__NR_perf_event_open, attr, pid, cpu, - group_fd, flags); - -#if HAVE_ATTR_TEST - if (unlikely(test_attr__enabled)) - test_attr__open(attr, pid, cpu, fd, group_fd, flags); -#endif - return fd; + return syscall(__NR_perf_event_open, attr, pid, cpu, + group_fd, flags); } #endif /* _PERF_SYS_H */ diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index a9599ab8c471..ec972e0892ab 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -30,9 +30,9 @@ #include #include #include -#include "../perf-sys.h" #include #include "event.h" +#include "util.h" #include "tests.h" #define ENV "PERF_TEST_ATTR" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fd865002cbbd..6f0e23105cf8 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1680,6 +1680,11 @@ retry_open: FD(evsel, cpu, thread) = fd; + if (unlikely(test_attr__enabled)) { + test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], + fd, group_fd, flags); + } + if (fd < 0) { err = -errno; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 75a9b1d62bba..74f85948d101 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -17,7 +17,7 @@ #include "mmap.h" #include "util/env.h" #include -#include "../perf-sys.h" +#include "util.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f486fdd3a538..ad737052e597 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -62,4 +62,10 @@ char *perf_exe(char *buf, int len); #endif #endif +extern bool test_attr__enabled; +void test_attr__ready(void); +void test_attr__init(void); +struct perf_event_attr; +void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, + int fd, int group_fd, unsigned long flags); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.3.1 From 3890b8d92710af75baedf291832cf40193b33454 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:19 +0100 Subject: objtool: Group headers to check in a single list In order to support multiple architectures and potentially different sets of headers to compare against their kernel equivalent, it is simpler to have all headers to check in a single list. Reviewed-by: Miroslav Benes Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/sync-check.sh | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index aa099b21dffa..b5f526638f0a 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -1,14 +1,18 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -FILES=' +FILES=" arch/x86/include/asm/inat_types.h arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk include/linux/static_call_types.h -' +arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]' +arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]' +arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]' +arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]' +" check_2 () { file1=$1 @@ -41,11 +45,12 @@ fi cd ../.. -for i in $FILES; do - check $i -done +while read -r file_entry; do + if [ -z "$file_entry" ]; then + continue + fi -check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' -check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' -check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' -check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' + check $file_entry +done < Date: Fri, 4 Sep 2020 16:30:20 +0100 Subject: objtool: Make sync-check consider the target architecture Do not take into account outdated headers unrelated to the build of the current architecture. [ jpoimboe: use $SRCARCH directly ] Reviewed-by: Miroslav Benes Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/sync-check.sh | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index b5f526638f0a..cea1c12607b9 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -1,6 +1,12 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +if [ -z "$SRCARCH" ]; then + echo 'sync-check.sh: error: missing $SRCARCH environment variable' >&2 + exit 1 +fi + +if [ "$SRCARCH" = "x86" ]; then FILES=" arch/x86/include/asm/inat_types.h arch/x86/include/asm/orc_types.h @@ -13,6 +19,7 @@ arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]' arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]' arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]' " +fi check_2 () { file1=$1 -- cgit v1.3.1 From c8ea0d672521ef663f0f9a77faa94d0d47102d77 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:21 +0100 Subject: objtool: Move macros describing structures to arch-dependent code Some macros are defined to describe the size and layout of structures exception_table_entry, jump_entry and alt_instr. These values can vary from one architecture to another. Have the values be defined by arch specific code. Suggested-by: Raphael Gault Reviewed-by: Miroslav Benes Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/arch/x86/include/arch_special.h | 20 ++++++++++++++++++++ tools/objtool/special.c | 16 +--------------- 2 files changed, 21 insertions(+), 15 deletions(-) create mode 100644 tools/objtool/arch/x86/include/arch_special.h (limited to 'tools') diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch_special.h new file mode 100644 index 000000000000..d818b2bffa02 --- /dev/null +++ b/tools/objtool/arch/x86/include/arch_special.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _X86_ARCH_SPECIAL_H +#define _X86_ARCH_SPECIAL_H + +#define EX_ENTRY_SIZE 12 +#define EX_ORIG_OFFSET 0 +#define EX_NEW_OFFSET 4 + +#define JUMP_ENTRY_SIZE 16 +#define JUMP_ORIG_OFFSET 0 +#define JUMP_NEW_OFFSET 4 + +#define ALT_ENTRY_SIZE 13 +#define ALT_ORIG_OFFSET 0 +#define ALT_NEW_OFFSET 4 +#define ALT_FEATURE_OFFSET 8 +#define ALT_ORIG_LEN_OFFSET 10 +#define ALT_NEW_LEN_OFFSET 11 + +#endif /* _X86_ARCH_SPECIAL_H */ diff --git a/tools/objtool/special.c b/tools/objtool/special.c index e893f1e48e44..b04f395de5de 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -14,21 +14,7 @@ #include "builtin.h" #include "special.h" #include "warn.h" - -#define EX_ENTRY_SIZE 12 -#define EX_ORIG_OFFSET 0 -#define EX_NEW_OFFSET 4 - -#define JUMP_ENTRY_SIZE 16 -#define JUMP_ORIG_OFFSET 0 -#define JUMP_NEW_OFFSET 4 - -#define ALT_ENTRY_SIZE 13 -#define ALT_ORIG_OFFSET 0 -#define ALT_NEW_OFFSET 4 -#define ALT_FEATURE_OFFSET 8 -#define ALT_ORIG_LEN_OFFSET 10 -#define ALT_NEW_LEN_OFFSET 11 +#include "arch_special.h" #define X86_FEATURE_POPCNT (4*32+23) #define X86_FEATURE_SMAP (9*32+20) -- cgit v1.3.1 From eda3dc905834dc9c99132f987f77b68cf53a8682 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:22 +0100 Subject: objtool: Abstract alternative special case handling Some alternatives associated with a specific feature need to be treated in a special way. Since the features and how to treat them vary from one architecture to another, move the special case handling to arch specific code. Reviewed-by: Miroslav Benes Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/arch/x86/Build | 1 + tools/objtool/arch/x86/special.c | 37 +++++++++++++++++++++++++++++++++++++ tools/objtool/objtool.h | 2 ++ tools/objtool/special.c | 32 +++++--------------------------- tools/objtool/special.h | 2 ++ tools/objtool/weak.c | 2 -- 6 files changed, 47 insertions(+), 29 deletions(-) create mode 100644 tools/objtool/arch/x86/special.c (limited to 'tools') diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build index 7c5004008e97..9f7869b5c5e0 100644 --- a/tools/objtool/arch/x86/Build +++ b/tools/objtool/arch/x86/Build @@ -1,3 +1,4 @@ +objtool-y += special.o objtool-y += decode.o inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c new file mode 100644 index 000000000000..823561e4015c --- /dev/null +++ b/tools/objtool/arch/x86/special.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include "../../special.h" +#include "../../builtin.h" + +#define X86_FEATURE_POPCNT (4 * 32 + 23) +#define X86_FEATURE_SMAP (9 * 32 + 20) + +void arch_handle_alternative(unsigned short feature, struct special_alt *alt) +{ + switch (feature) { + case X86_FEATURE_SMAP: + /* + * If UACCESS validation is enabled; force that alternative; + * otherwise force it the other way. + * + * What we want to avoid is having both the original and the + * alternative code flow at the same time, in that case we can + * find paths that see the STAC but take the NOP instead of + * CLAC and the other way around. + */ + if (uaccess) + alt->skip_orig = true; + else + alt->skip_alt = true; + break; + case X86_FEATURE_POPCNT: + /* + * It has been requested that we don't validate the !POPCNT + * feature path which is a "very very small percentage of + * machines". + */ + alt->skip_orig = true; + break; + default: + break; + } +} diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index a635f68a9b09..4125d4578b23 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -12,6 +12,8 @@ #include "elf.h" +#define __weak __attribute__((weak)) + struct objtool_file { struct elf *elf; struct list_head insn_list; diff --git a/tools/objtool/special.c b/tools/objtool/special.c index b04f395de5de..1a2420febd08 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -16,9 +16,6 @@ #include "warn.h" #include "arch_special.h" -#define X86_FEATURE_POPCNT (4*32+23) -#define X86_FEATURE_SMAP (9*32+20) - struct special_entry { const char *sec; bool group, jump_or_nop; @@ -54,6 +51,10 @@ struct special_entry entries[] = { {}, }; +void __weak arch_handle_alternative(unsigned short feature, struct special_alt *alt) +{ +} + static int get_alt_entry(struct elf *elf, struct special_entry *entry, struct section *sec, int idx, struct special_alt *alt) @@ -78,30 +79,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, feature = *(unsigned short *)(sec->data->d_buf + offset + entry->feature); - - /* - * It has been requested that we don't validate the !POPCNT - * feature path which is a "very very small percentage of - * machines". - */ - if (feature == X86_FEATURE_POPCNT) - alt->skip_orig = true; - - /* - * If UACCESS validation is enabled; force that alternative; - * otherwise force it the other way. - * - * What we want to avoid is having both the original and the - * alternative code flow at the same time, in that case we can - * find paths that see the STAC but take the NOP instead of - * CLAC and the other way around. - */ - if (feature == X86_FEATURE_SMAP) { - if (uaccess) - alt->skip_orig = true; - else - alt->skip_alt = true; - } + arch_handle_alternative(feature, alt); } orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig); diff --git a/tools/objtool/special.h b/tools/objtool/special.h index 35061530e46e..44da89afeda2 100644 --- a/tools/objtool/special.h +++ b/tools/objtool/special.h @@ -28,4 +28,6 @@ struct special_alt { int special_get_alts(struct elf *elf, struct list_head *alts); +void arch_handle_alternative(unsigned short feature, struct special_alt *alt); + #endif /* _SPECIAL_H */ diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index 29180d599b08..7843e9a7a72f 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -9,8 +9,6 @@ #include #include "objtool.h" -#define __weak __attribute__((weak)) - #define UNSUPPORTED(name) \ ({ \ fprintf(stderr, "error: objtool: " name " not implemented\n"); \ -- cgit v1.3.1 From 45245f51f9a4b9a883a8c94468473c1de9b88153 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:23 +0100 Subject: objtool: Make relocation in alternative handling arch dependent As pointed out by the comment in handle_group_alt(), support of relocation for instructions in an alternative group depends on whether arch specific kernel code handles it. So, let objtool arch specific code decide whether a relocation for the alternative section should be accepted. Reviewed-by: Miroslav Benes Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/arch/x86/special.c | 13 +++++++++++++ tools/objtool/check.c | 19 ++++++------------- tools/objtool/check.h | 6 ++++++ tools/objtool/special.h | 4 ++++ 4 files changed, 29 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 823561e4015c..34e0e162e6fd 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -35,3 +35,16 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt) break; } } + +bool arch_support_alt_relocation(struct special_alt *special_alt, + struct instruction *insn, + struct reloc *reloc) +{ + /* + * The x86 alternatives code adjusts the offsets only when it + * encounters a branch instruction at the very beginning of the + * replacement group. + */ + return insn->offset == special_alt->new_off && + (insn->type == INSN_CALL || is_static_jump(insn)); +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4afc2d5465b9..1796a7c464eb 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -110,12 +110,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, for (insn = next_insn_same_sec(file, insn); insn; \ insn = next_insn_same_sec(file, insn)) -static bool is_static_jump(struct instruction *insn) -{ - return insn->type == INSN_JUMP_CONDITIONAL || - insn->type == INSN_JUMP_UNCONDITIONAL; -} - static bool is_sibling_call(struct instruction *insn) { /* An indirect jump is either a sibling call or a jump to a table. */ @@ -972,6 +966,8 @@ static int handle_group_alt(struct objtool_file *file, alt_group = alt_group_next_index++; insn = *new_insn; sec_for_each_insn_from(file, insn) { + struct reloc *alt_reloc; + if (insn->offset >= special_alt->new_off + special_alt->new_len) break; @@ -988,14 +984,11 @@ static int handle_group_alt(struct objtool_file *file, * .altinstr_replacement section, unless the arch's * alternatives code can adjust the relative offsets * accordingly. - * - * The x86 alternatives code adjusts the offsets only when it - * encounters a branch instruction at the very beginning of the - * replacement group. */ - if ((insn->offset != special_alt->new_off || - (insn->type != INSN_CALL && !is_static_jump(insn))) && - find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) { + alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (alt_reloc && + !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { WARN_FUNC("unsupported relocation in alternatives section", insn->sec, insn->offset); diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 6cac34542122..1de1188b23cd 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -48,6 +48,12 @@ struct instruction { #endif }; +static inline bool is_static_jump(struct instruction *insn) +{ + return insn->type == INSN_JUMP_CONDITIONAL || + insn->type == INSN_JUMP_UNCONDITIONAL; +} + struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); diff --git a/tools/objtool/special.h b/tools/objtool/special.h index 44da89afeda2..1dc1bb3e74c6 100644 --- a/tools/objtool/special.h +++ b/tools/objtool/special.h @@ -7,6 +7,7 @@ #define _SPECIAL_H #include +#include "check.h" #include "elf.h" struct special_alt { @@ -30,4 +31,7 @@ int special_get_alts(struct elf *elf, struct list_head *alts); void arch_handle_alternative(unsigned short feature, struct special_alt *alt); +bool arch_support_alt_relocation(struct special_alt *special_alt, + struct instruction *insn, + struct reloc *reloc); #endif /* _SPECIAL_H */ -- cgit v1.3.1 From d871f7b5a6a2a30f4eba577fd56941fa3657e394 Mon Sep 17 00:00:00 2001 From: Raphael Gault Date: Fri, 4 Sep 2020 16:30:24 +0100 Subject: objtool: Refactor jump table code to support other architectures The way to identify jump tables and retrieve all the data necessary to handle the different execution branches is not the same on all architectures. In order to be able to add other architecture support, define an arch-dependent function to process jump-tables. Reviewed-by: Miroslav Benes Signed-off-by: Raphael Gault [J.T.: Move arm64 bits out of this patch, Have only one function to find the start of the jump table, for now assume that the jump table format will be the same as x86] Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/arch/x86/special.c | 95 ++++++++++++++++++++++++++++++++++++++++ tools/objtool/check.c | 90 ++----------------------------------- tools/objtool/check.h | 1 - tools/objtool/special.h | 4 ++ 4 files changed, 103 insertions(+), 87 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 34e0e162e6fd..fd4af88c0ea5 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include + #include "../../special.h" #include "../../builtin.h" @@ -48,3 +50,96 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, return insn->offset == special_alt->new_off && (insn->type == INSN_CALL || is_static_jump(insn)); } + +/* + * There are 3 basic jump table patterns: + * + * 1. jmpq *[rodata addr](,%reg,8) + * + * This is the most common case by far. It jumps to an address in a simple + * jump table which is stored in .rodata. + * + * 2. jmpq *[rodata addr](%rip) + * + * This is caused by a rare GCC quirk, currently only seen in three driver + * functions in the kernel, only with certain obscure non-distro configs. + * + * As part of an optimization, GCC makes a copy of an existing switch jump + * table, modifies it, and then hard-codes the jump (albeit with an indirect + * jump) to use a single entry in the table. The rest of the jump table and + * some of its jump targets remain as dead code. + * + * In such a case we can just crudely ignore all unreachable instruction + * warnings for the entire object file. Ideally we would just ignore them + * for the function, but that would require redesigning the code quite a + * bit. And honestly that's just not worth doing: unreachable instruction + * warnings are of questionable value anyway, and this is such a rare issue. + * + * 3. mov [rodata addr],%reg1 + * ... some instructions ... + * jmpq *(%reg1,%reg2,8) + * + * This is a fairly uncommon pattern which is new for GCC 6. As of this + * writing, there are 11 occurrences of it in the allmodconfig kernel. + * + * As of GCC 7 there are quite a few more of these and the 'in between' code + * is significant. Esp. with KASAN enabled some of the code between the mov + * and jmpq uses .rodata itself, which can confuse things. + * + * TODO: Once we have DWARF CFI and smarter instruction decoding logic, + * ensure the same register is used in the mov and jump instructions. + * + * NOTE: RETPOLINE made it harder still to decode dynamic jumps. + */ +struct reloc *arch_find_switch_table(struct objtool_file *file, + struct instruction *insn) +{ + struct reloc *text_reloc, *rodata_reloc; + struct section *table_sec; + unsigned long table_offset; + + /* look for a relocation which references .rodata */ + text_reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (!text_reloc || text_reloc->sym->type != STT_SECTION || + !text_reloc->sym->sec->rodata) + return NULL; + + table_offset = text_reloc->addend; + table_sec = text_reloc->sym->sec; + + if (text_reloc->type == R_X86_64_PC32) + table_offset += 4; + + /* + * Make sure the .rodata address isn't associated with a + * symbol. GCC jump tables are anonymous data. + * + * Also support C jump tables which are in the same format as + * switch jump tables. For objtool to recognize them, they + * need to be placed in the C_JUMP_TABLE_SECTION section. They + * have symbols associated with them. + */ + if (find_symbol_containing(table_sec, table_offset) && + strcmp(table_sec->name, C_JUMP_TABLE_SECTION)) + return NULL; + + /* + * Each table entry has a rela associated with it. The rela + * should reference text in the same function as the original + * instruction. + */ + rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset); + if (!rodata_reloc) + return NULL; + + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead + * code behind. + */ + if (text_reloc->type == R_X86_64_PC32) + file->ignore_unreachables = true; + + return rodata_reloc; +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 1796a7c464eb..a94ad88d036c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -20,8 +20,6 @@ #define FAKE_JUMP_OFFSET -1 -#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" - struct alternative { struct list_head list; struct instruction *insn; @@ -1190,56 +1188,15 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, } /* - * find_jump_table() - Given a dynamic jump, find the switch jump table in - * .rodata associated with it. - * - * There are 3 basic patterns: - * - * 1. jmpq *[rodata addr](,%reg,8) - * - * This is the most common case by far. It jumps to an address in a simple - * jump table which is stored in .rodata. - * - * 2. jmpq *[rodata addr](%rip) - * - * This is caused by a rare GCC quirk, currently only seen in three driver - * functions in the kernel, only with certain obscure non-distro configs. - * - * As part of an optimization, GCC makes a copy of an existing switch jump - * table, modifies it, and then hard-codes the jump (albeit with an indirect - * jump) to use a single entry in the table. The rest of the jump table and - * some of its jump targets remain as dead code. - * - * In such a case we can just crudely ignore all unreachable instruction - * warnings for the entire object file. Ideally we would just ignore them - * for the function, but that would require redesigning the code quite a - * bit. And honestly that's just not worth doing: unreachable instruction - * warnings are of questionable value anyway, and this is such a rare issue. - * - * 3. mov [rodata addr],%reg1 - * ... some instructions ... - * jmpq *(%reg1,%reg2,8) - * - * This is a fairly uncommon pattern which is new for GCC 6. As of this - * writing, there are 11 occurrences of it in the allmodconfig kernel. - * - * As of GCC 7 there are quite a few more of these and the 'in between' code - * is significant. Esp. with KASAN enabled some of the code between the mov - * and jmpq uses .rodata itself, which can confuse things. - * - * TODO: Once we have DWARF CFI and smarter instruction decoding logic, - * ensure the same register is used in the mov and jump instructions. - * - * NOTE: RETPOLINE made it harder still to decode dynamic jumps. + * find_jump_table() - Given a dynamic jump, find the switch jump table + * associated with it. */ static struct reloc *find_jump_table(struct objtool_file *file, struct symbol *func, struct instruction *insn) { - struct reloc *text_reloc, *table_reloc; + struct reloc *table_reloc; struct instruction *dest_insn, *orig_insn = insn; - struct section *table_sec; - unsigned long table_offset; /* * Backward search using the @first_jump_src links, these help avoid @@ -1260,52 +1217,13 @@ static struct reloc *find_jump_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; - /* look for a relocation which references .rodata */ - text_reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); - if (!text_reloc || text_reloc->sym->type != STT_SECTION || - !text_reloc->sym->sec->rodata) - continue; - - table_offset = text_reloc->addend; - table_sec = text_reloc->sym->sec; - - if (text_reloc->type == R_X86_64_PC32) - table_offset += 4; - - /* - * Make sure the .rodata address isn't associated with a - * symbol. GCC jump tables are anonymous data. - * - * Also support C jump tables which are in the same format as - * switch jump tables. For objtool to recognize them, they - * need to be placed in the C_JUMP_TABLE_SECTION section. They - * have symbols associated with them. - */ - if (find_symbol_containing(table_sec, table_offset) && - strcmp(table_sec->name, C_JUMP_TABLE_SECTION)) - continue; - - /* - * Each table entry has a reloc associated with it. The reloc - * should reference text in the same function as the original - * instruction. - */ - table_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset); + table_reloc = arch_find_switch_table(file, insn); if (!table_reloc) continue; dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend); if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func) continue; - /* - * Use of RIP-relative switch jumps is quite rare, and - * indicates a rare GCC quirk/bug which can leave dead code - * behind. - */ - if (text_reloc->type == R_X86_64_PC32) - file->ignore_unreachables = true; - return table_reloc; } diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 1de1188b23cd..5ec00a4b891b 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -66,5 +66,4 @@ struct instruction *find_insn(struct objtool_file *file, insn->sec == sec; \ insn = list_next_entry(insn, list)) - #endif /* _CHECK_H */ diff --git a/tools/objtool/special.h b/tools/objtool/special.h index 1dc1bb3e74c6..abddf38ef334 100644 --- a/tools/objtool/special.h +++ b/tools/objtool/special.h @@ -10,6 +10,8 @@ #include "check.h" #include "elf.h" +#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" + struct special_alt { struct list_head list; @@ -34,4 +36,6 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt); bool arch_support_alt_relocation(struct special_alt *special_alt, struct instruction *insn, struct reloc *reloc); +struct reloc *arch_find_switch_table(struct objtool_file *file, + struct instruction *insn); #endif /* _SPECIAL_H */ -- cgit v1.3.1 From ee819aedf34a8f35cd54ee3967c7beb4d1d4a635 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:27 +0100 Subject: objtool: Make unwind hint definitions available to other architectures Unwind hints are useful to provide objtool with information about stack states in non-standard functions/code. While the type of information being provided might be very arch specific, the mechanism to provide the information can be useful for other architectures. Move the relevant unwint hint definitions for all architectures to see. [ jpoimboe: REGS_IRET -> REGS_PARTIAL ] Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- arch/x86/include/asm/orc_types.h | 34 --------- arch/x86/include/asm/unwind_hints.h | 56 +++----------- arch/x86/kernel/unwind_orc.c | 11 +-- include/linux/objtool.h | 88 ++++++++++++++++++++++ tools/arch/x86/include/asm/orc_types.h | 34 --------- tools/include/linux/objtool.h | 129 +++++++++++++++++++++++++++++++++ tools/objtool/check.c | 4 +- tools/objtool/orc_dump.c | 9 ++- tools/objtool/orc_gen.c | 5 +- tools/objtool/sync-check.sh | 4 +- 10 files changed, 249 insertions(+), 125 deletions(-) create mode 100644 tools/include/linux/objtool.h (limited to 'tools') diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index d25534940bde..fdbffec4cfde 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -39,27 +39,6 @@ #define ORC_REG_SP_INDIRECT 9 #define ORC_REG_MAX 15 -/* - * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the - * caller's SP right before it made the call). Used for all callable - * functions, i.e. all C code and all callable asm functions. - * - * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points - * to a fully populated pt_regs from a syscall, interrupt, or exception. - * - * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset - * points to the iret return frame. - * - * The UNWIND_HINT macros are used only for the unwind_hint struct. They - * aren't used in struct orc_entry due to size and complexity constraints. - * Objtool converts them to real types when it converts the hints to orc - * entries. - */ -#define ORC_TYPE_CALL 0 -#define ORC_TYPE_REGS 1 -#define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 - #ifndef __ASSEMBLY__ /* * This struct is more or less a vastly simplified version of the DWARF Call @@ -78,19 +57,6 @@ struct orc_entry { unsigned end:1; } __packed; -/* - * This struct is used by asm and inline asm code to manually annotate the - * location of registers on the stack for the ORC unwinder. - * - * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. - */ -struct unwind_hint { - u32 ip; - s16 sp_offset; - u8 sp_reg; - u8 type; - u8 end; -}; #endif /* __ASSEMBLY__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 7d903fdb3f43..664d4610d700 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -1,51 +1,17 @@ #ifndef _ASM_X86_UNWIND_HINTS_H #define _ASM_X86_UNWIND_HINTS_H +#include + #include "orc_types.h" #ifdef __ASSEMBLY__ -/* - * In asm, there are two kinds of code: normal C-type callable functions and - * the rest. The normal callable functions can be called by other code, and - * don't do anything unusual with the stack. Such normal callable functions - * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this - * category. In this case, no special debugging annotations are needed because - * objtool can automatically generate the ORC data for the ORC unwinder to read - * at runtime. - * - * Anything which doesn't fall into the above category, such as syscall and - * interrupt handlers, tends to not be called directly by other functions, and - * often does unusual non-C-function-type things with the stack pointer. Such - * code needs to be annotated such that objtool can understand it. The - * following CFI hint macros are for this type of code. - * - * These macros provide hints to objtool about the state of the stack at each - * instruction. Objtool starts from the hints and follows the code flow, - * making automatic CFI adjustments when it sees pushes and pops, filling out - * the debuginfo as necessary. It will also warn if it sees any - * inconsistencies. - */ -.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0 -#ifdef CONFIG_STACK_VALIDATION -.Lunwind_hint_ip_\@: - .pushsection .discard.unwind_hints - /* struct unwind_hint */ - .long .Lunwind_hint_ip_\@ - . - .short \sp_offset - .byte \sp_reg - .byte \type - .byte \end - .balign 4 - .popsection -#endif -.endm - .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1 + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 .endm -.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 +.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 .if \base == %rsp .if \indirect .set sp_reg, ORC_REG_SP_INDIRECT @@ -66,24 +32,24 @@ .set sp_offset, \offset - .if \iret - .set type, ORC_TYPE_REGS_IRET + .if \partial + .set type, UNWIND_HINT_TYPE_REGS_PARTIAL .elseif \extra == 0 - .set type, ORC_TYPE_REGS_IRET + .set type, UNWIND_HINT_TYPE_REGS_PARTIAL .set sp_offset, \offset + (16*8) .else - .set type, ORC_TYPE_REGS + .set type, UNWIND_HINT_TYPE_REGS .endif UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type .endm .macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 - UNWIND_HINT_REGS base=\base offset=\offset iret=1 + UNWIND_HINT_REGS base=\base offset=\offset partial=1 .endm .macro UNWIND_HINT_FUNC sp_offset=8 - UNWIND_HINT sp_offset=\sp_offset + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL .endm /* @@ -92,7 +58,7 @@ * initial_func_cfi. */ .macro UNWIND_HINT_RET_OFFSET sp_offset=8 - UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset + UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index ec88bbe08a32..6a339ce328e0 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include @@ -127,12 +128,12 @@ static struct orc_entry null_orc_entry = { .sp_offset = sizeof(long), .sp_reg = ORC_REG_SP, .bp_reg = ORC_REG_UNDEFINED, - .type = ORC_TYPE_CALL + .type = UNWIND_HINT_TYPE_CALL }; /* Fake frame pointer entry -- used as a fallback for generated code */ static struct orc_entry orc_fp_entry = { - .type = ORC_TYPE_CALL, + .type = UNWIND_HINT_TYPE_CALL, .sp_reg = ORC_REG_BP, .sp_offset = 16, .bp_reg = ORC_REG_PREV_SP, @@ -531,7 +532,7 @@ bool unwind_next_frame(struct unwind_state *state) /* Find IP, SP and possibly regs: */ switch (orc->type) { - case ORC_TYPE_CALL: + case UNWIND_HINT_TYPE_CALL: ip_p = sp - sizeof(long); if (!deref_stack_reg(state, ip_p, &state->ip)) @@ -546,7 +547,7 @@ bool unwind_next_frame(struct unwind_state *state) state->signal = false; break; - case ORC_TYPE_REGS: + case UNWIND_HINT_TYPE_REGS: if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { orc_warn_current("can't access registers at %pB\n", (void *)orig_ip); @@ -559,7 +560,7 @@ bool unwind_next_frame(struct unwind_state *state) state->signal = true; break; - case ORC_TYPE_REGS_IRET: + case UNWIND_HINT_TYPE_REGS_PARTIAL: if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { orc_warn_current("can't access iret registers at %pB\n", (void *)orig_ip); diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 15e9997a9fb4..ab82c793c897 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -2,9 +2,55 @@ #ifndef _LINUX_OBJTOOL_H #define _LINUX_OBJTOOL_H +#ifndef __ASSEMBLY__ + +#include + +/* + * This struct is used by asm and inline asm code to manually annotate the + * location of registers on the stack. + */ +struct unwind_hint { + u32 ip; + s16 sp_offset; + u8 sp_reg; + u8 type; + u8 end; +}; +#endif + +/* + * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP + * (the caller's SP right before it made the call). Used for all callable + * functions, i.e. all C code and all callable asm functions. + * + * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset + * points to a fully populated pt_regs from a syscall, interrupt, or exception. + * + * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that + * sp_reg+sp_offset points to the iret return frame. + */ +#define UNWIND_HINT_TYPE_CALL 0 +#define UNWIND_HINT_TYPE_REGS 1 +#define UNWIND_HINT_TYPE_REGS_PARTIAL 2 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 + #ifdef CONFIG_STACK_VALIDATION #ifndef __ASSEMBLY__ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "987: \n\t" \ + ".pushsection .discard.unwind_hints\n\t" \ + /* struct unwind_hint */ \ + ".long 987b - .\n\t" \ + ".short " __stringify(sp_offset) "\n\t" \ + ".byte " __stringify(sp_reg) "\n\t" \ + ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(end) "\n\t" \ + ".balign 4 \n\t" \ + ".popsection\n\t" + /* * This macro marks the given function's stack frame as "non-standard", which * tells objtool to ignore the function when doing stack metadata validation. @@ -29,12 +75,54 @@ .long 999b; \ .popsection; +/* + * In asm, there are two kinds of code: normal C-type callable functions and + * the rest. The normal callable functions can be called by other code, and + * don't do anything unusual with the stack. Such normal callable functions + * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * category. In this case, no special debugging annotations are needed because + * objtool can automatically generate the ORC data for the ORC unwinder to read + * at runtime. + * + * Anything which doesn't fall into the above category, such as syscall and + * interrupt handlers, tends to not be called directly by other functions, and + * often does unusual non-C-function-type things with the stack pointer. Such + * code needs to be annotated such that objtool can understand it. The + * following CFI hint macros are for this type of code. + * + * These macros provide hints to objtool about the state of the stack at each + * instruction. Objtool starts from the hints and follows the code flow, + * making automatic CFI adjustments when it sees pushes and pops, filling out + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ + .long .Lunwind_hint_ip_\@ - . + .short \sp_offset + .byte \sp_reg + .byte \type + .byte \end + .balign 4 + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ +#ifndef __ASSEMBLY__ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#else #define ANNOTATE_INTRA_FUNCTION_CALL +.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.endm +#endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index d25534940bde..fdbffec4cfde 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -39,27 +39,6 @@ #define ORC_REG_SP_INDIRECT 9 #define ORC_REG_MAX 15 -/* - * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the - * caller's SP right before it made the call). Used for all callable - * functions, i.e. all C code and all callable asm functions. - * - * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points - * to a fully populated pt_regs from a syscall, interrupt, or exception. - * - * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset - * points to the iret return frame. - * - * The UNWIND_HINT macros are used only for the unwind_hint struct. They - * aren't used in struct orc_entry due to size and complexity constraints. - * Objtool converts them to real types when it converts the hints to orc - * entries. - */ -#define ORC_TYPE_CALL 0 -#define ORC_TYPE_REGS 1 -#define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 - #ifndef __ASSEMBLY__ /* * This struct is more or less a vastly simplified version of the DWARF Call @@ -78,19 +57,6 @@ struct orc_entry { unsigned end:1; } __packed; -/* - * This struct is used by asm and inline asm code to manually annotate the - * location of registers on the stack for the ORC unwinder. - * - * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. - */ -struct unwind_hint { - u32 ip; - s16 sp_offset; - u8 sp_reg; - u8 type; - u8 end; -}; #endif /* __ASSEMBLY__ */ #endif /* _ORC_TYPES_H */ diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h new file mode 100644 index 000000000000..ab82c793c897 --- /dev/null +++ b/tools/include/linux/objtool.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_OBJTOOL_H +#define _LINUX_OBJTOOL_H + +#ifndef __ASSEMBLY__ + +#include + +/* + * This struct is used by asm and inline asm code to manually annotate the + * location of registers on the stack. + */ +struct unwind_hint { + u32 ip; + s16 sp_offset; + u8 sp_reg; + u8 type; + u8 end; +}; +#endif + +/* + * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP + * (the caller's SP right before it made the call). Used for all callable + * functions, i.e. all C code and all callable asm functions. + * + * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset + * points to a fully populated pt_regs from a syscall, interrupt, or exception. + * + * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that + * sp_reg+sp_offset points to the iret return frame. + */ +#define UNWIND_HINT_TYPE_CALL 0 +#define UNWIND_HINT_TYPE_REGS 1 +#define UNWIND_HINT_TYPE_REGS_PARTIAL 2 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 + +#ifdef CONFIG_STACK_VALIDATION + +#ifndef __ASSEMBLY__ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "987: \n\t" \ + ".pushsection .discard.unwind_hints\n\t" \ + /* struct unwind_hint */ \ + ".long 987b - .\n\t" \ + ".short " __stringify(sp_offset) "\n\t" \ + ".byte " __stringify(sp_reg) "\n\t" \ + ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(end) "\n\t" \ + ".balign 4 \n\t" \ + ".popsection\n\t" + +/* + * This macro marks the given function's stack frame as "non-standard", which + * tells objtool to ignore the function when doing stack metadata validation. + * It should only be used in special cases where you're 100% sure it won't + * affect the reliability of frame pointers and kernel stack traces. + * + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ +#define STACK_FRAME_NON_STANDARD(func) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ + *__func_stack_frame_non_standard_##func = func + +#else /* __ASSEMBLY__ */ + +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL \ + 999: \ + .pushsection .discard.intra_function_calls; \ + .long 999b; \ + .popsection; + +/* + * In asm, there are two kinds of code: normal C-type callable functions and + * the rest. The normal callable functions can be called by other code, and + * don't do anything unusual with the stack. Such normal callable functions + * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * category. In this case, no special debugging annotations are needed because + * objtool can automatically generate the ORC data for the ORC unwinder to read + * at runtime. + * + * Anything which doesn't fall into the above category, such as syscall and + * interrupt handlers, tends to not be called directly by other functions, and + * often does unusual non-C-function-type things with the stack pointer. Such + * code needs to be annotated such that objtool can understand it. The + * following CFI hint macros are for this type of code. + * + * These macros provide hints to objtool about the state of the stack at each + * instruction. Objtool starts from the hints and follows the code flow, + * making automatic CFI adjustments when it sees pushes and pops, filling out + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ + .long .Lunwind_hint_ip_\@ - . + .short \sp_offset + .byte \sp_reg + .byte \type + .byte \end + .balign 4 + .popsection +.endm + +#endif /* __ASSEMBLY__ */ + +#else /* !CONFIG_STACK_VALIDATION */ + +#ifndef __ASSEMBLY__ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "\n\t" +#define STACK_FRAME_NON_STANDARD(func) +#else +#define ANNOTATE_INTRA_FUNCTION_CALL +.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.endm +#endif + +#endif /* CONFIG_STACK_VALIDATION */ + +#endif /* _LINUX_OBJTOOL_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a94ad88d036c..95c6e0d31c0a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -14,6 +14,7 @@ #include "warn.h" #include "arch_elf.h" +#include #include #include #include @@ -1805,7 +1806,8 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, return 0; } - if (cfi->type == ORC_TYPE_REGS || cfi->type == ORC_TYPE_REGS_IRET) + if (cfi->type == UNWIND_HINT_TYPE_REGS || + cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL) return update_cfi_state_regs(insn, cfi, op); switch (op->dest.type) { diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index fca46e006fc2..5e6a95368d35 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -4,6 +4,7 @@ */ #include +#include #include #include "objtool.h" #include "warn.h" @@ -37,12 +38,12 @@ static const char *reg_name(unsigned int reg) static const char *orc_type_name(unsigned int type) { switch (type) { - case ORC_TYPE_CALL: + case UNWIND_HINT_TYPE_CALL: return "call"; - case ORC_TYPE_REGS: + case UNWIND_HINT_TYPE_REGS: return "regs"; - case ORC_TYPE_REGS_IRET: - return "iret"; + case UNWIND_HINT_TYPE_REGS_PARTIAL: + return "regs (partial)"; default: return "?"; } diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 22fe4398197f..235663b96adc 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -6,6 +6,9 @@ #include #include +#include +#include + #include "check.h" #include "warn.h" @@ -146,7 +149,7 @@ int create_orc_sections(struct objtool_file *file) struct orc_entry empty = { .sp_reg = ORC_REG_UNDEFINED, .bp_reg = ORC_REG_UNDEFINED, - .type = ORC_TYPE_CALL, + .type = UNWIND_HINT_TYPE_CALL, }; sec = find_section_by_name(file->elf, ".orc_unwind"); diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index cea1c12607b9..606a4b5e929f 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -6,8 +6,10 @@ if [ -z "$SRCARCH" ]; then exit 1 fi +FILES="include/linux/objtool.h" + if [ "$SRCARCH" = "x86" ]; then -FILES=" +FILES="$FILES arch/x86/include/asm/inat_types.h arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h -- cgit v1.3.1 From edea9e6bcbeaa41718b022a8b99ffddef2330bbc Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:28 +0100 Subject: objtool: Decode unwind hint register depending on architecture The set of registers that can be included in an unwind hint and their encoding will depend on the architecture. Have arch specific code to decode that register. Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/arch.h | 2 ++ tools/objtool/arch/x86/decode.c | 37 +++++++++++++++++++++++++++++++++++++ tools/objtool/check.c | 27 +-------------------------- 3 files changed, 40 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index b18c5f61d42d..4a84c3081b8e 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -88,4 +88,6 @@ unsigned long arch_dest_reloc_offset(int addend); const char *arch_nop_insn(int len); +int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); + #endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 1967370440b3..cde9c36e40ae 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -15,6 +15,7 @@ #include "../../elf.h" #include "../../arch.h" #include "../../warn.h" +#include static unsigned char op_to_cfi_reg[][2] = { {CFI_AX, CFI_R8}, @@ -583,3 +584,39 @@ const char *arch_nop_insn(int len) return nops[len-1]; } + +int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) +{ + struct cfi_reg *cfa = &insn->cfi.cfa; + + switch (sp_reg) { + case ORC_REG_UNDEFINED: + cfa->base = CFI_UNDEFINED; + break; + case ORC_REG_SP: + cfa->base = CFI_SP; + break; + case ORC_REG_BP: + cfa->base = CFI_BP; + break; + case ORC_REG_SP_INDIRECT: + cfa->base = CFI_SP_INDIRECT; + break; + case ORC_REG_R10: + cfa->base = CFI_R10; + break; + case ORC_REG_R13: + cfa->base = CFI_R13; + break; + case ORC_REG_DI: + cfa->base = CFI_DI; + break; + case ORC_REG_DX: + cfa->base = CFI_DX; + break; + default: + return -1; + } + + return 0; +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 95c6e0d31c0a..4e2f703b6a25 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1367,32 +1367,7 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; - switch (hint->sp_reg) { - case ORC_REG_UNDEFINED: - cfa->base = CFI_UNDEFINED; - break; - case ORC_REG_SP: - cfa->base = CFI_SP; - break; - case ORC_REG_BP: - cfa->base = CFI_BP; - break; - case ORC_REG_SP_INDIRECT: - cfa->base = CFI_SP_INDIRECT; - break; - case ORC_REG_R10: - cfa->base = CFI_R10; - break; - case ORC_REG_R13: - cfa->base = CFI_R13; - break; - case ORC_REG_DI: - cfa->base = CFI_DI; - break; - case ORC_REG_DX: - cfa->base = CFI_DX; - break; - default: + if (arch_decode_hint_reg(insn, hint->sp_reg)) { WARN_FUNC("unsupported unwind_hint sp base reg %d", insn->sec, insn->offset, hint->sp_reg); return -1; -- cgit v1.3.1 From d00451c8118f8f7ab8e057bc6ee2f8b7d70b6a1c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Sep 2020 14:17:00 -0700 Subject: selftests/lkdtm: Use "comm" instead of "diff" for dmesg Instead of full GNU diff (which smaller boot environments may not have), use "comm" which is more available. Reported-by: Naresh Kamboju Cc: linux-kselftest@vger.kernel.org Fixes: f131d9edc29d ("selftests/lkdtm: Don't clear dmesg when running tests") Signed-off-by: Kees Cook Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200909211700.2399399-1-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/lkdtm/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index 8383eb89d88a..bb7a1775307b 100755 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -82,7 +82,7 @@ dmesg > "$DMESG" ($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true # Record and dump the results -dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true +dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true cat "$LOG" # Check for expected output -- cgit v1.3.1 From 16f3ddfbad52fddd7dafdf4b540ca6c506c9d815 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 9 Sep 2020 17:22:50 +0100 Subject: tools: bpftool: Log info-level messages when building bpftool man pages To build man pages for bpftool (and for eBPF helper functions), rst2man can log different levels of information. Let's make it log all levels to keep the RST files clean. Doing so, rst2man complains about double colons, used for literal blocks, that look like underlines for section titles. Let's add the necessary blank lines. v2: - Use "--verbose" instead of "-r 1" (same behaviour but more readable). - Pass it through a RST2MAN_OPTS variable so we can easily pass other options too. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200909162251.15498-2-quentin@isovalent.com --- tools/bpf/bpftool/Documentation/Makefile | 3 ++- tools/bpf/bpftool/Documentation/bpftool-btf.rst | 3 +++ tools/bpf/bpftool/Documentation/bpftool-gen.rst | 4 ++++ tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 +++ 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 815ac9804aee..a45b51d98468 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -28,12 +28,13 @@ man: man8 helpers man8: $(DOC_MAN8) RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) +RST2MAN_OPTS += --verbose $(OUTPUT)%.8: %.rst ifndef RST2MAN_DEP $(error "rst2man not found, but required to generate man pages") endif - $(QUIET_GEN)rst2man $< > $@ + $(QUIET_GEN)rst2man $(RST2MAN_OPTS) $< > $@ clean: helpers-clean $(call QUIET_CLEAN, Documentation) diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 896f4c6c2870..864553e62af4 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -91,6 +91,7 @@ OPTIONS EXAMPLES ======== **# bpftool btf dump id 1226** + :: [1] PTR '(anon)' type_id=2 @@ -104,6 +105,7 @@ EXAMPLES This gives an example of default output for all supported BTF kinds. **$ cat prog.c** + :: struct fwd_struct; @@ -144,6 +146,7 @@ This gives an example of default output for all supported BTF kinds. } **$ bpftool btf dump file prog.o** + :: [1] PTR '(anon)' type_id=2 diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index df85dbd962c0..d52b03a352d7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -146,6 +146,7 @@ OPTIONS EXAMPLES ======== **$ cat example.c** + :: #include @@ -187,6 +188,7 @@ This is example BPF application with two BPF programs and a mix of BPF maps and global variables. **$ bpftool gen skeleton example.o** + :: /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ @@ -241,6 +243,7 @@ and global variables. #endif /* __EXAMPLE_SKEL_H__ */ **$ cat example_user.c** + :: #include "example.skel.h" @@ -283,6 +286,7 @@ and global variables. } **# ./example_user** + :: my_map name: my_map diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 083db6c2fc67..8f187c6416cd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -182,6 +182,7 @@ OPTIONS EXAMPLES ======== **# bpftool map show** + :: 10: hash name some_map flags 0x0 @@ -203,6 +204,7 @@ The following three commands are equivalent: **# bpftool map dump id 10** + :: key: 00 01 02 03 value: 00 01 02 03 04 05 06 07 @@ -210,6 +212,7 @@ The following three commands are equivalent: Found 2 elements **# bpftool map getnext id 10 key 0 1 2 3** + :: key: -- cgit v1.3.1 From 41d5c37b74084b44ccc688dbab508acb5fdddc48 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 9 Sep 2020 17:22:51 +0100 Subject: selftests, bpftool: Add bpftool (and eBPF helpers) documentation build eBPF selftests include a script to check that bpftool builds correctly with different command lines. Let's add one build for bpftool's documentation so as to detect errors or warning reported by rst2man when compiling the man pages. Also add a build to the selftests Makefile to make sure we build bpftool documentation along with bpftool when building the selftests. This also builds and checks warnings for the man page for eBPF helpers, which is built along bpftool's documentation. This change adds rst2man as a dependency for selftests (it comes with Python's "docutils"). v2: - Use "--exit-status=1" option for rst2man instead of counting lines from stderr. - Also build bpftool as part as the selftests build (and not only when the tests are actually run). Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200909162251.15498-3-quentin@isovalent.com --- tools/testing/selftests/bpf/Makefile | 5 +++++ tools/testing/selftests/bpf/test_bpftool_build.sh | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 65d3d9aaeb31..05798c2b5c67 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -176,6 +176,11 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ OUTPUT=$(BUILD_DIR)/bpftool/ \ prefix= DESTDIR=$(SCRATCH_DIR)/ install + $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation + $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ + -C $(BPFTOOLDIR)/Documentation \ + OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \ + prefix= DESTDIR=$(SCRATCH_DIR)/ install $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index ac349a5cea7e..2db3c60e1e61 100755 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -85,6 +85,23 @@ make_with_tmpdir() { echo } +make_doc_and_clean() { + echo -e "\$PWD: $PWD" + echo -e "command: make -s $* doc >/dev/null" + RST2MAN_OPTS="--exit-status=1" make $J -s $* doc + if [ $? -ne 0 ] ; then + ERROR=1 + printf "FAILURE: Errors or warnings when building documentation\n" + fi + ( + if [ $# -ge 1 ] ; then + cd ${@: -1} + fi + make -s doc-clean + ) + echo +} + echo "Trying to build bpftool" echo -e "... through kbuild\n" @@ -145,3 +162,7 @@ make_and_clean make_with_tmpdir OUTPUT make_with_tmpdir O + +echo -e "Checking documentation build\n" +# From tools/bpf/bpftool +make_doc_and_clean -- cgit v1.3.1 From 82b8cf0acc7bfe7b247e5ddc4417f6146d74c0b8 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 9 Sep 2020 17:24:58 +0100 Subject: tools: bpftool: Print optional built-in features along with version Bpftool has a number of features that can be included or left aside during compilation. This includes: - Support for libbfd, providing the disassembler for JIT-compiled programs. - Support for BPF skeletons, used for profiling programs or iterating on the PIDs of processes associated with BPF objects. In order to make it easy for users to understand what features were compiled for a given bpftool binary, print the status of the two features above when showing the version number for bpftool ("bpftool -V" or "bpftool version"). Document this in the main manual page. Example invocations: $ bpftool version ./bpftool v5.9.0-rc1 features: libbfd, skeletons $ bpftool -p version { "version": "5.9.0-rc1", "features": { "libbfd": true, "skeletons": true } } Some other parameters are optional at compilation ("DISASM_FOUR_ARGS_SIGNATURE", LIBCAP support) but they do not impact significantly bpftool's behaviour from a user's point of view, so their status is not reported. Available commands and supported program types depend on the version number, and are therefore not reported either. Note that they are already available, albeit without JSON, via bpftool's help messages. v3: - Use a simple list instead of boolean values for plain output. v2: - Fix JSON (object instead or array for the features). Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200909162500.17010-2-quentin@isovalent.com --- tools/bpf/bpftool/Documentation/bpftool.rst | 8 ++++++- tools/bpf/bpftool/main.c | 33 +++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 420d4d5df8b6..a3629a3f1175 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -50,7 +50,13 @@ OPTIONS Print short help message (similar to **bpftool help**). -V, --version - Print version number (similar to **bpftool version**). + Print version number (similar to **bpftool version**), and + optional features that were included when bpftool was + compiled. Optional features include linking against libbfd to + provide the disassembler for JIT-ted programs (**bpftool prog + dump jited**) and usage of BPF skeletons (some features like + **bpftool prog profile** or showing pids associated to BPF + objects may rely on it). -j, --json Generate JSON output. For commands that cannot produce JSON, this diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 4a191fcbeb82..682daaa49e6a 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -70,13 +70,42 @@ static int do_help(int argc, char **argv) static int do_version(int argc, char **argv) { +#ifdef HAVE_LIBBFD_SUPPORT + const bool has_libbfd = true; +#else + const bool has_libbfd = false; +#endif +#ifdef BPFTOOL_WITHOUT_SKELETONS + const bool has_skeletons = false; +#else + const bool has_skeletons = true; +#endif + if (json_output) { - jsonw_start_object(json_wtr); + jsonw_start_object(json_wtr); /* root object */ + jsonw_name(json_wtr, "version"); jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION); - jsonw_end_object(json_wtr); + + jsonw_name(json_wtr, "features"); + jsonw_start_object(json_wtr); /* features */ + jsonw_bool_field(json_wtr, "libbfd", has_libbfd); + jsonw_bool_field(json_wtr, "skeletons", has_skeletons); + jsonw_end_object(json_wtr); /* features */ + + jsonw_end_object(json_wtr); /* root object */ } else { + unsigned int nb_features = 0; + printf("%s v%s\n", bin_name, BPFTOOL_VERSION); + printf("features:"); + if (has_libbfd) { + printf(" libbfd"); + nb_features++; + } + if (has_skeletons) + printf("%s skeletons", nb_features++ ? "," : ""); + printf("\n"); } return 0; } -- cgit v1.3.1 From f28ef96d7b04a76575a020a9da4f92358abe68c6 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 9 Sep 2020 17:24:59 +0100 Subject: tools: bpftool: Include common options from separate file Nearly all man pages for bpftool have the same common set of option flags (--help, --version, --json, --pretty, --debug). The description is duplicated across all the pages, which is more difficult to maintain if the description of an option changes. It may also be confusing to sort out what options are not "common" and should not be copied when creating new manual pages. Let's move the description for those common options to a separate file, which is included with a RST directive when generating the man pages. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200909162500.17010-3-quentin@isovalent.com --- tools/bpf/bpftool/Documentation/Makefile | 2 +- tools/bpf/bpftool/Documentation/bpftool-btf.rst | 17 +-------------- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 17 +-------------- .../bpf/bpftool/Documentation/bpftool-feature.rst | 17 +-------------- tools/bpf/bpftool/Documentation/bpftool-gen.rst | 17 +-------------- tools/bpf/bpftool/Documentation/bpftool-iter.rst | 11 +--------- tools/bpf/bpftool/Documentation/bpftool-link.rst | 17 +-------------- tools/bpf/bpftool/Documentation/bpftool-map.rst | 17 +-------------- tools/bpf/bpftool/Documentation/bpftool-net.rst | 17 +-------------- tools/bpf/bpftool/Documentation/bpftool-perf.rst | 17 +-------------- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 18 +--------------- .../bpftool/Documentation/bpftool-struct_ops.rst | 18 +--------------- tools/bpf/bpftool/Documentation/bpftool.rst | 24 +--------------------- tools/bpf/bpftool/Documentation/common_options.rst | 22 ++++++++++++++++++++ 14 files changed, 35 insertions(+), 196 deletions(-) create mode 100644 tools/bpf/bpftool/Documentation/common_options.rst (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index a45b51d98468..5e3815320dab 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -19,7 +19,7 @@ man8dir = $(mandir)/man8 # Load targets for building eBPF helpers man page. include ../../Makefile.helpers -MAN8_RST = $(filter-out $(HELPERS_RST),$(wildcard *.rst)) +MAN8_RST = $(wildcard bpftool*.rst) _DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST)) DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8)) diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 864553e62af4..52a7b2f6c9cb 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -71,22 +71,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. - - -d, --debug - Print all logs available from libbpf, including debug-level - information. + .. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index a226aee3574f..3dba89db000e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -116,26 +116,11 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. + .. include:: common_options.rst -f, --bpffs Show file names of pinned programs. - -d, --debug - Print all logs available from libbpf, including debug-level - information. - EXAMPLES ======== | diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 8609f06e71de..f1aae5690e3c 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -71,22 +71,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. - - -d, --debug - Print all logs available from libbpf, including debug-level - information. + .. include:: common_options.rst SEE ALSO ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index d52b03a352d7..29d4cf4c3422 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -126,22 +126,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, - this option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. - - -d, --debug - Print all logs available from libbpf, including debug-level - information. + .. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 070ffacb42b5..b688cf11805c 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -51,16 +51,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -d, --debug - Print all logs available, even debug-level information. This - includes logs from libbpf as well as from the verifier, when - attempting to load programs. + .. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index dc7693b5e606..ce122be58bae 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -62,18 +62,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. + .. include:: common_options.rst -f, --bpffs When showing BPF links, show file names of pinned @@ -83,10 +72,6 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. - -d, --debug - Print all logs available, even debug-level information. This - includes logs from libbpf. - EXAMPLES ======== **# bpftool link show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 8f187c6416cd..4b42629ade3e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -155,18 +155,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. + .. include:: common_options.rst -f, --bpffs Show file names of pinned maps. @@ -175,10 +164,6 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. - -d, --debug - Print all logs available from libbpf, including debug-level - information. - EXAMPLES ======== **# bpftool map show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index aa7450736179..56439c32934d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -75,22 +75,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. - - -d, --debug - Print all logs available from libbpf, including debug-level - information. + .. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index 9c592b7c6775..36d257a36e9b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -40,22 +40,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. - - -d, --debug - Print all logs available from libbpf, including debug-level - information. + .. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 82e356b664e8..9b2b18e2a3ac 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -210,18 +210,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. + .. include:: common_options.rst -f, --bpffs When showing BPF programs, show file names of pinned @@ -234,11 +223,6 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. - -d, --debug - Print all logs available, even debug-level information. This - includes logs from libbpf as well as from the verifier, when - attempting to load programs. - EXAMPLES ======== **# bpftool prog show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst index d93cd1cb8b0f..315f1f21f2ba 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -60,23 +60,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short generic help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. - - -d, --debug - Print all logs available, even debug-level information. This - includes logs from libbpf as well as from the verifier, when - attempting to load programs. + .. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index a3629a3f1175..b87f8c2df49d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -46,24 +46,7 @@ DESCRIPTION OPTIONS ======= - -h, --help - Print short help message (similar to **bpftool help**). - - -V, --version - Print version number (similar to **bpftool version**), and - optional features that were included when bpftool was - compiled. Optional features include linking against libbfd to - provide the disassembler for JIT-ted programs (**bpftool prog - dump jited**) and usage of BPF skeletons (some features like - **bpftool prog profile** or showing pids associated to BPF - objects may rely on it). - - -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. - - -p, --pretty - Generate human-readable JSON output. Implies **-j**. + .. include:: common_options.rst -m, --mapcompat Allow loading maps with unknown map definitions. @@ -72,11 +55,6 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. - -d, --debug - Print all logs available, even debug-level information. This - includes logs from libbpf as well as from the verifier, when - attempting to load programs. - SEE ALSO ======== **bpf**\ (2), diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst new file mode 100644 index 000000000000..05d06c74dcaa --- /dev/null +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -0,0 +1,22 @@ +-h, --help + Print short help message (similar to **bpftool help**). + +-V, --version + Print version number (similar to **bpftool version**), and optional + features that were included when bpftool was compiled. Optional + features include linking against libbfd to provide the disassembler + for JIT-ted programs (**bpftool prog dump jited**) and usage of BPF + skeletons (some features like **bpftool prog profile** or showing + pids associated to BPF objects may rely on it). + +-j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + +-p, --pretty + Generate human-readable JSON output. Implies **-j**. + +-d, --debug + Print all logs available, even debug-level information. This includes + logs from libbpf as well as from the verifier, when attempting to + load programs. -- cgit v1.3.1 From 2f7de9865ba3cbfcf8b504f07154fdb6124176a4 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 9 Sep 2020 17:27:12 +0100 Subject: selftests: bpf: Test iterating a sockmap Add a test that exercises a basic sockmap / sockhash iteration. For now we simply count the number of elements seen. Once sockmap update from iterators works we can extend this to perform a full copy. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200909162712.221874-4-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 89 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_iter.h | 9 +++ .../testing/selftests/bpf/progs/bpf_iter_sockmap.c | 43 +++++++++++ .../testing/selftests/bpf/progs/bpf_iter_sockmap.h | 3 + 4 files changed, 144 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 0b79d78b98db..4b7a527e7e82 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -6,6 +6,9 @@ #include "test_skmsg_load_helpers.skel.h" #include "test_sockmap_update.skel.h" #include "test_sockmap_invalid_update.skel.h" +#include "bpf_iter_sockmap.skel.h" + +#include "progs/bpf_iter_sockmap.h" #define TCP_REPAIR 19 /* TCP sock is under repair right now */ @@ -171,6 +174,88 @@ static void test_sockmap_invalid_update(void) test_sockmap_invalid_update__destroy(skel); } +static void test_sockmap_iter(enum bpf_map_type map_type) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + int err, len, src_fd, iter_fd, duration = 0; + union bpf_iter_link_info linfo = {0}; + __s64 sock_fd[SOCKMAP_MAX_ENTRIES]; + __u32 i, num_sockets, max_elems; + struct bpf_iter_sockmap *skel; + struct bpf_link *link; + struct bpf_map *src; + char buf[64]; + + skel = bpf_iter_sockmap__open_and_load(); + if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n")) + return; + + for (i = 0; i < ARRAY_SIZE(sock_fd); i++) + sock_fd[i] = -1; + + /* Make sure we have at least one "empty" entry to test iteration of + * an empty slot. + */ + num_sockets = ARRAY_SIZE(sock_fd) - 1; + + if (map_type == BPF_MAP_TYPE_SOCKMAP) { + src = skel->maps.sockmap; + max_elems = bpf_map__max_entries(src); + } else { + src = skel->maps.sockhash; + max_elems = num_sockets; + } + + src_fd = bpf_map__fd(src); + + for (i = 0; i < num_sockets; i++) { + sock_fd[i] = connected_socket_v4(); + if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n")) + goto out; + + err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST); + if (CHECK(err, "map_update", "failed: %s\n", strerror(errno))) + goto out; + } + + linfo.map.map_fd = src_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(skel->progs.count_elems, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->elems != max_elems, "elems", "got %u expected %u\n", + skel->bss->elems, max_elems)) + goto close_iter; + + if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n", + skel->bss->socks, num_sockets)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + for (i = 0; i < num_sockets; i++) { + if (sock_fd[i] >= 0) + close(sock_fd[i]); + } + bpf_iter_sockmap__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -187,4 +272,8 @@ void test_sockmap_basic(void) test_sockmap_update(BPF_MAP_TYPE_SOCKHASH); if (test__start_subtest("sockmap update in unsafe context")) test_sockmap_invalid_update(); + if (test__start_subtest("sockmap iter")) + test_sockmap_iter(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash iter")) + test_sockmap_iter(BPF_MAP_TYPE_SOCKHASH); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index c196280df90d..df682af75510 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -13,6 +13,7 @@ #define udp6_sock udp6_sock___not_used #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used +#define bpf_iter__sockmap bpf_iter__sockmap___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -26,6 +27,7 @@ #undef udp6_sock #undef bpf_iter__bpf_map_elem #undef bpf_iter__bpf_sk_storage_map +#undef bpf_iter__sockmap struct bpf_iter_meta { struct seq_file *seq; @@ -96,3 +98,10 @@ struct bpf_iter__bpf_sk_storage_map { struct sock *sk; void *value; }; + +struct bpf_iter__sockmap { + struct bpf_iter_meta *meta; + struct bpf_map *map; + void *key; + struct sock *sk; +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c new file mode 100644 index 000000000000..0e27f73dd803 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Cloudflare */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include "bpf_iter_sockmap.h" +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, SOCKMAP_MAX_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} sockmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, SOCKMAP_MAX_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} sockhash SEC(".maps"); + +__u32 elems = 0; +__u32 socks = 0; + +SEC("iter/sockmap") +int count_elems(struct bpf_iter__sockmap *ctx) +{ + struct sock *sk = ctx->sk; + __u32 tmp, *key = ctx->key; + int ret; + + if (key) + elems++; + + if (sk) + socks++; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h new file mode 100644 index 000000000000..35a675d13c0f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define SOCKMAP_MAX_ENTRIES (64) -- cgit v1.3.1 From e54846581891b8cf1c8c903a4feae65a5aa27ed5 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 9 Sep 2020 22:07:36 -0700 Subject: selftests/mptcp: Better delay & reordering configuration The delay was intended to be configured to "simulate" a high(er) BDP link. As such, it needs to be set as part of the loss-configuration and not as part of the netem reordering configuration. The reordering-config also requires a delay but that delay is the reordering-extend. So, a good approach is to set the reordering-extend as a function of the configured latency. E.g., 25% of the overall latency. To speed up the selftests, we limit the delay to 50ms maximum to avoid having the selftests run for too long. Finally, the intention of tc_reorder was that when it is unset, the test picks a random configuration. However, currently it is always initialized and thus the random config won't be picked up. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/6 Reported-and-reviewed-by: Matthieu Baerts Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 57d75b7f6220..e4df9ba64824 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -14,9 +14,8 @@ capture=false timeout=30 ipv6=true ethtool_random_on=true -tc_delay="$((RANDOM%400))" +tc_delay="$((RANDOM%50))" tc_loss=$((RANDOM%101)) -tc_reorder="" testmode="" sndbuf=0 rcvbuf=0 @@ -628,30 +627,32 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns4" $sender dead:beef:3::1 done -[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss +[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms echo -n "INFO: Using loss of $tc_loss " test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " +reorder_delay=$(($tc_delay / 4)) + if [ -z "${tc_reorder}" ]; then reorder1=$((RANDOM%10)) reorder1=$((100 - reorder1)) reorder2=$((RANDOM%100)) - if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then + if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then tc_reorder="reorder ${reorder1}% ${reorder2}%" - echo -n "$tc_reorder " + echo -n "$tc_reorder with delay ${reorder_delay}ms " fi elif [ "$tc_reorder" = "0" ];then tc_reorder="" -elif [ "$tc_delay" -gt 0 ];then +elif [ "$reorder_delay" -gt 0 ];then # reordering requires some delay tc_reorder="reorder $tc_reorder" - echo -n "$tc_reorder " + echo -n "$tc_reorder with delay ${reorder_delay}ms " fi echo "on ns3eth4" -tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder +tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder for sender in $ns1 $ns2 $ns3 $ns4;do run_tests_lo "$ns1" "$sender" 10.0.1.1 1 -- cgit v1.3.1 From a20693b6e72e59fb9b896193df0f8007693447d0 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 10 Sep 2020 11:26:50 +0100 Subject: tools: bpftool: Clean up function to dump map entry The function used to dump a map entry in bpftool is a bit difficult to follow, as a consequence to earlier refactorings. There is a variable ("num_elems") which does not appear to be necessary, and the error handling would look cleaner if moved to its own function. Let's clean it up. No functional change. v2: - v1 was erroneously removing the check on fd maps in an attempt to get support for outer map dumps. This is already working. Instead, v2 focuses on cleaning up the dump_map_elem() function, to avoid similar confusion in the future. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200910102652.10509-2-quentin@isovalent.com --- tools/bpf/bpftool/map.c | 101 +++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index bc0071228f88..c8159cb4fb1e 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -213,8 +213,9 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, jsonw_end_object(json_wtr); } -static void print_entry_error(struct bpf_map_info *info, unsigned char *key, - const char *error_msg) +static void +print_entry_error_msg(struct bpf_map_info *info, unsigned char *key, + const char *error_msg) { int msg_size = strlen(error_msg); bool single_line, break_names; @@ -232,6 +233,40 @@ static void print_entry_error(struct bpf_map_info *info, unsigned char *key, printf("\n"); } +static void +print_entry_error(struct bpf_map_info *map_info, void *key, int lookup_errno) +{ + /* For prog_array maps or arrays of maps, failure to lookup the value + * means there is no entry for that key. Do not print an error message + * in that case. + */ + if (map_is_map_of_maps(map_info->type) || + map_is_map_of_progs(map_info->type)) + return; + + if (json_output) { + jsonw_start_object(json_wtr); /* entry */ + jsonw_name(json_wtr, "key"); + print_hex_data_json(key, map_info->key_size); + jsonw_name(json_wtr, "value"); + jsonw_start_object(json_wtr); /* error */ + jsonw_string_field(json_wtr, "error", strerror(lookup_errno)); + jsonw_end_object(json_wtr); /* error */ + jsonw_end_object(json_wtr); /* entry */ + } else { + const char *msg = NULL; + + if (lookup_errno == ENOENT) + msg = ""; + else if (lookup_errno == ENOSPC && + map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) + msg = ""; + + print_entry_error_msg(map_info, key, + msg ? : strerror(lookup_errno)); + } +} + static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, unsigned char *value) { @@ -713,56 +748,23 @@ static int dump_map_elem(int fd, void *key, void *value, struct bpf_map_info *map_info, struct btf *btf, json_writer_t *btf_wtr) { - int num_elems = 0; - int lookup_errno; - - if (!bpf_map_lookup_elem(fd, key, value)) { - if (json_output) { - print_entry_json(map_info, key, value, btf); - } else { - if (btf) { - struct btf_dumper d = { - .btf = btf, - .jw = btf_wtr, - .is_plain_text = true, - }; - - do_dump_btf(&d, map_info, key, value); - } else { - print_entry_plain(map_info, key, value); - } - num_elems++; - } - return num_elems; + if (bpf_map_lookup_elem(fd, key, value)) { + print_entry_error(map_info, key, errno); + return -1; } - /* lookup error handling */ - lookup_errno = errno; - - if (map_is_map_of_maps(map_info->type) || - map_is_map_of_progs(map_info->type)) - return 0; - if (json_output) { - jsonw_start_object(json_wtr); - jsonw_name(json_wtr, "key"); - print_hex_data_json(key, map_info->key_size); - jsonw_name(json_wtr, "value"); - jsonw_start_object(json_wtr); - jsonw_string_field(json_wtr, "error", strerror(lookup_errno)); - jsonw_end_object(json_wtr); - jsonw_end_object(json_wtr); - } else { - const char *msg = NULL; - - if (lookup_errno == ENOENT) - msg = ""; - else if (lookup_errno == ENOSPC && - map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) - msg = ""; + print_entry_json(map_info, key, value, btf); + } else if (btf) { + struct btf_dumper d = { + .btf = btf, + .jw = btf_wtr, + .is_plain_text = true, + }; - print_entry_error(map_info, key, - msg ? : strerror(lookup_errno)); + do_dump_btf(&d, map_info, key, value); + } else { + print_entry_plain(map_info, key, value); } return 0; @@ -873,7 +875,8 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, err = 0; break; } - num_elems += dump_map_elem(fd, key, value, info, btf, wtr); + if (!dump_map_elem(fd, key, value, info, btf, wtr)) + num_elems++; prev_key = key; } -- cgit v1.3.1 From 86233ce35e4b886dc5998c72ec6158ed72150782 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 10 Sep 2020 11:26:51 +0100 Subject: tools: bpftool: Keep errors for map-of-map dumps if distinct from ENOENT When dumping outer maps or prog_array maps, and on lookup failure, bpftool simply skips the entry with no error message. This is because the kernel returns non-zero when no value is found for the provided key, which frequently happen for those maps if they have not been filled. When such a case occurs, errno is set to ENOENT. It seems unlikely we could receive other error codes at this stage (we successfully retrieved map info just before), but to be on the safe side, let's skip the entry only if errno was ENOENT, and not for the other errors. v3: New patch Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200910102652.10509-3-quentin@isovalent.com --- tools/bpf/bpftool/map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index c8159cb4fb1e..d8581d5e98a1 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -240,8 +240,8 @@ print_entry_error(struct bpf_map_info *map_info, void *key, int lookup_errno) * means there is no entry for that key. Do not print an error message * in that case. */ - if (map_is_map_of_maps(map_info->type) || - map_is_map_of_progs(map_info->type)) + if ((map_is_map_of_maps(map_info->type) || + map_is_map_of_progs(map_info->type)) && lookup_errno == ENOENT) return; if (json_output) { -- cgit v1.3.1 From e3b9626f09d429788d929c9b9000a069fcfc056e Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 10 Sep 2020 11:26:52 +0100 Subject: tools: bpftool: Add "inner_map" to "bpftool map create" outer maps There is no support for creating maps of types array-of-map or hash-of-map in bpftool. This is because the kernel needs an inner_map_fd to collect metadata on the inner maps to be supported by the new map, but bpftool does not provide a way to pass this file descriptor. Add a new optional "inner_map" keyword that can be used to pass a reference to a map, retrieve a fd to that map, and pass it as the inner_map_fd. Add related documentation and bash completion. Note that we can reference the inner map by its name, meaning we can have several times the keyword "name" with different meanings (mandatory outer map name, and possibly a name to use to find the inner_map_fd). The bash completion will offer it just once, and will not suggest "name" on the following command: # bpftool map create /sys/fs/bpf/my_outer_map type hash_of_maps \ inner_map name my_inner_map [TAB] Fixing that specific case seems too convoluted. Completion will work as expected, however, if the outer map name comes first and the "inner_map name ..." is passed second. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200910102652.10509-4-quentin@isovalent.com --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 10 ++++-- tools/bpf/bpftool/bash-completion/bpftool | 22 +++++++++++- tools/bpf/bpftool/map.c | 48 +++++++++++++++++-------- 3 files changed, 62 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 4b42629ade3e..8eac254ade48 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -23,7 +23,8 @@ MAP COMMANDS | **bpftool** **map** { **show** | **list** } [*MAP*] | **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ -| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*] +| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \ +| [**dev** *NAME*] | **bpftool** **map dump** *MAP* | **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] | **bpftool** **map lookup** *MAP* [**key** *DATA*] @@ -67,7 +68,7 @@ DESCRIPTION maps. On such kernels bpftool will automatically emit this information as well. - **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*] + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*] Create a new map with given parameters and pin it to *bpffs* as *FILE*. @@ -75,6 +76,11 @@ DESCRIPTION desired flags, e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h UAPI header for existing flags). + To create maps of type array-of-maps or hash-of-maps, the + **inner_map** keyword must be used to pass an inner map. The + kernel needs it to collect metadata related to the inner maps + that the new map will work with. + Keyword **dev** expects a network interface name, and is used to request hardware offload for the map. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 7b68e3c0a5fb..3f1da30c4da6 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -709,9 +709,26 @@ _bpftool() "$cur" ) ) return 0 ;; - key|value|flags|name|entries) + key|value|flags|entries) return 0 ;; + inner_map) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + id) + _bpftool_get_map_ids + ;; + name) + case $pprev in + inner_map) + _bpftool_get_map_names + ;; + *) + return 0 + ;; + esac + ;; *) _bpftool_once_attr 'type' _bpftool_once_attr 'key' @@ -719,6 +736,9 @@ _bpftool() _bpftool_once_attr 'entries' _bpftool_once_attr 'name' _bpftool_once_attr 'flags' + if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then + _bpftool_once_attr 'inner_map' + fi _bpftool_once_attr 'dev' return 0 ;; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index d8581d5e98a1..a7efbd84fbcc 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1250,7 +1250,7 @@ static int do_create(int argc, char **argv) { struct bpf_create_map_attr attr = { NULL, }; const char *pinfile; - int err, fd; + int err = -1, fd; if (!REQ_ARGS(7)) return -1; @@ -1265,13 +1265,13 @@ static int do_create(int argc, char **argv) if (attr.map_type) { p_err("map type already specified"); - return -1; + goto exit; } attr.map_type = map_type_from_str(*argv); if ((int)attr.map_type < 0) { p_err("unrecognized map type: %s", *argv); - return -1; + goto exit; } NEXT_ARG(); } else if (is_prefix(*argv, "name")) { @@ -1280,43 +1280,56 @@ static int do_create(int argc, char **argv) } else if (is_prefix(*argv, "key")) { if (parse_u32_arg(&argc, &argv, &attr.key_size, "key size")) - return -1; + goto exit; } else if (is_prefix(*argv, "value")) { if (parse_u32_arg(&argc, &argv, &attr.value_size, "value size")) - return -1; + goto exit; } else if (is_prefix(*argv, "entries")) { if (parse_u32_arg(&argc, &argv, &attr.max_entries, "max entries")) - return -1; + goto exit; } else if (is_prefix(*argv, "flags")) { if (parse_u32_arg(&argc, &argv, &attr.map_flags, "flags")) - return -1; + goto exit; } else if (is_prefix(*argv, "dev")) { NEXT_ARG(); if (attr.map_ifindex) { p_err("offload device already specified"); - return -1; + goto exit; } attr.map_ifindex = if_nametoindex(*argv); if (!attr.map_ifindex) { p_err("unrecognized netdevice '%s': %s", *argv, strerror(errno)); - return -1; + goto exit; } NEXT_ARG(); + } else if (is_prefix(*argv, "inner_map")) { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int inner_map_fd; + + NEXT_ARG(); + if (!REQ_ARGS(2)) + usage(); + inner_map_fd = map_parse_fd_and_info(&argc, &argv, + &info, &len); + if (inner_map_fd < 0) + return -1; + attr.inner_map_fd = inner_map_fd; } else { p_err("unknown arg %s", *argv); - return -1; + goto exit; } } if (!attr.name) { p_err("map name not specified"); - return -1; + goto exit; } set_max_rlimit(); @@ -1324,17 +1337,22 @@ static int do_create(int argc, char **argv) fd = bpf_create_map_xattr(&attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); - return -1; + goto exit; } err = do_pin_fd(fd, pinfile); close(fd); if (err) - return err; + goto exit; if (json_output) jsonw_null(json_wtr); - return 0; + +exit: + if (attr.inner_map_fd > 0) + close(attr.inner_map_fd); + + return err; } static int do_pop_dequeue(int argc, char **argv) @@ -1420,7 +1438,7 @@ static int do_help(int argc, char **argv) "Usage: %1$s %2$s { show | list } [MAP]\n" " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n" " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n" - " [dev NAME]\n" + " [inner_map MAP] [dev NAME]\n" " %1$s %2$s dump MAP\n" " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" " %1$s %2$s lookup MAP [key DATA]\n" -- cgit v1.3.1 From 90a1deda75c6b749fb89310e4744554a4c36ea33 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 10 Sep 2020 19:13:36 +0200 Subject: selftests/bpf: Fix test_ksyms on non-SMP kernels On non-SMP kernels __per_cpu_start is not 0, so look it up in kallsyms. Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200910171336.3161995-1-iii@linux.ibm.com --- tools/testing/selftests/bpf/prog_tests/ksyms.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c index e3d6777226a8..b771804b2342 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -32,6 +32,7 @@ out: void test_ksyms(void) { + __u64 per_cpu_start_addr = kallsyms_find("__per_cpu_start"); __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); const char *btf_path = "/sys/kernel/btf/vmlinux"; struct test_ksyms *skel; @@ -63,8 +64,9 @@ void test_ksyms(void) "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0); CHECK(data->out__btf_size != btf_size, "btf_size", "got %llu, exp %llu\n", data->out__btf_size, btf_size); - CHECK(data->out__per_cpu_start != 0, "__per_cpu_start", - "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0); + CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start", + "got %llu, exp %llu\n", data->out__per_cpu_start, + per_cpu_start_addr); cleanup: test_ksyms__destroy(skel); -- cgit v1.3.1 From 6e057fc15a2da4ee03eb1fa6889cf687e690106e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 10 Sep 2020 13:27:18 -0700 Subject: selftests/bpf: Define string const as global for test_sysctl_prog.c When tweaking llvm optimizations, I found that selftest build failed with the following error: libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1 libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name' in section '.rodata.str1.1' Error: failed to open BPF object file: Relocation failed make: *** [/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h] Error 255 make: *** Deleting file `/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h' The local string constant "tcp_mem_name" is put into '.rodata.str1.1' section which libbpf cannot handle. Using untweaked upstream llvm, "tcp_mem_name" is completely inlined after loop unrolling. Commit 7fb5eefd7639 ("selftests/bpf: Fix test_sysctl_loop{1, 2} failure due to clang change") solved a similar problem by defining the string const as a global. Let us do the same here for test_sysctl_prog.c so it can weather future potential llvm changes. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200910202718.956042-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/test_sysctl_prog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 50525235380e..5489823c83fc 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -19,11 +19,11 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif +const char tcp_mem_name[] = "net/ipv4/tcp_mem"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { - char tcp_mem_name[] = "net/ipv4/tcp_mem"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); -- cgit v1.3.1 From 1aef5b4391f0c75c0a1523706a7b0311846ee12f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 10 Sep 2020 13:33:14 -0700 Subject: bpf: Fix comment for helper bpf_current_task_under_cgroup() This should be "current" not "skb". Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)") Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Cc: Link: https://lore.kernel.org/bpf/20200910203314.70018-1-songliubraving@fb.com --- include/uapi/linux/bpf.h | 4 ++-- tools/include/uapi/linux/bpf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90359cab501d..7dd314176df7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1447,8 +1447,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 90359cab501d..7dd314176df7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1447,8 +1447,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) -- cgit v1.3.1 From 18841da98100c936990ac9013d55bf6b40e1f609 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 10 Sep 2020 21:39:35 +0100 Subject: tools: bpftool: Automate generation for "SEE ALSO" sections in man pages The "SEE ALSO" sections of bpftool's manual pages refer to bpf(2), bpf-helpers(7), then all existing bpftool man pages (save the current one). This leads to nearly-identical lists being duplicated in all manual pages. Ideally, when a new page is created, all lists should be updated accordingly, but this has led to omissions and inconsistencies multiple times in the past. Let's take it out of the RST files and generate the "SEE ALSO" sections automatically in the Makefile when generating the man pages. The lists are not really useful in the RST anyway because all other pages are available in the same directory. v3: - Fix conflict with a previous patchset that introduced RST2MAN_OPTS variable passed to rst2man. v2: - Use "echo -n" instead of "printf" in Makefile, to avoid any risk of passing a format string directly to the command. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200910203935.25304-1-quentin@isovalent.com --- tools/bpf/bpftool/Documentation/Makefile | 12 +++++++++++- tools/bpf/bpftool/Documentation/bpftool-btf.rst | 17 ----------------- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 16 ---------------- tools/bpf/bpftool/Documentation/bpftool-feature.rst | 16 ---------------- tools/bpf/bpftool/Documentation/bpftool-gen.rst | 16 ---------------- tools/bpf/bpftool/Documentation/bpftool-iter.rst | 16 ---------------- tools/bpf/bpftool/Documentation/bpftool-link.rst | 17 ----------------- tools/bpf/bpftool/Documentation/bpftool-map.rst | 16 ---------------- tools/bpf/bpftool/Documentation/bpftool-net.rst | 17 ----------------- tools/bpf/bpftool/Documentation/bpftool-perf.rst | 17 ----------------- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 16 ---------------- tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst | 17 ----------------- tools/bpf/bpftool/Documentation/bpftool.rst | 16 ---------------- 13 files changed, 11 insertions(+), 198 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 5e3815320dab..4c9dd1e45244 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -30,11 +30,21 @@ man8: $(DOC_MAN8) RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) RST2MAN_OPTS += --verbose +list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST)))) +see_also = $(subst " ",, \ + "\n" \ + "SEE ALSO\n" \ + "========\n" \ + "\t**bpf**\ (2),\n" \ + "\t**bpf-helpers**\\ (7)" \ + $(foreach page,$(call list_pages,$(1)),",\n\t**$(page)**\\ (8)") \ + "\n") + $(OUTPUT)%.8: %.rst ifndef RST2MAN_DEP $(error "rst2man not found, but required to generate man pages") endif - $(QUIET_GEN)rst2man $(RST2MAN_OPTS) $< > $@ + $(QUIET_GEN)( cat $< ; echo -n $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@ clean: helpers-clean $(call QUIET_CLEAN, Documentation) diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 52a7b2f6c9cb..ff4d327a582e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -217,20 +217,3 @@ All the standard ways to specify map or program are supported: **# bpftool btf dump prog tag b88e0a09b1d9759d** **# bpftool btf dump prog pinned /sys/fs/bpf/prog_name** - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 3dba89db000e..790944c35602 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -143,19 +143,3 @@ EXAMPLES :: ID AttachType AttachFlags Name - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index f1aae5690e3c..dd3771bdbc57 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -72,19 +72,3 @@ DESCRIPTION OPTIONS ======= .. include:: common_options.rst - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 29d4cf4c3422..84cf0639696f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -279,19 +279,3 @@ and global variables. my_static_var: 7 This is a stripped-out version of skeleton generated for above example code. - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index b688cf11805c..51f49bead619 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -68,19 +68,3 @@ EXAMPLES Create a file-based bpf iterator from bpf_iter_hashmap.o and map with id 20, and pin it to /sys/fs/bpf/my_hashmap - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index ce122be58bae..5f7db2a837cc 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -106,20 +106,3 @@ EXAMPLES :: -rw------- 1 root root 0 Apr 23 21:39 link - - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 8eac254ade48..dade10cdf295 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -270,19 +270,3 @@ would be lost as soon as bpftool exits). key: 00 00 00 00 value: 22 02 00 00 Found 1 element - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 56439c32934d..d8165d530937 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -172,20 +172,3 @@ EXAMPLES :: xdp: - - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index 36d257a36e9b..e958ce91de72 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -63,20 +63,3 @@ EXAMPLES {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \ {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \ {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}] - - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 9b2b18e2a3ac..358c7309d419 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -326,19 +326,3 @@ EXAMPLES 40176203 cycles (83.05%) 42518139 instructions # 1.06 insns per cycle (83.39%) 123 llc_misses # 2.89 LLC misses per million insns (83.15%) - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-struct_ops**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst index 315f1f21f2ba..506e70ee78e9 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -82,20 +82,3 @@ EXAMPLES :: Registered tcp_congestion_ops cubic id 110 - - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool**\ (8), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index b87f8c2df49d..e7d949334961 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -54,19 +54,3 @@ OPTIONS -n, --nomount Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. - -SEE ALSO -======== - **bpf**\ (2), - **bpf-helpers**\ (7), - **bpftool-btf**\ (8), - **bpftool-cgroup**\ (8), - **bpftool-feature**\ (8), - **bpftool-gen**\ (8), - **bpftool-iter**\ (8), - **bpftool-link**\ (8), - **bpftool-map**\ (8), - **bpftool-net**\ (8), - **bpftool-perf**\ (8), - **bpftool-prog**\ (8), - **bpftool-struct_ops**\ (8) -- cgit v1.3.1 From 1a418cb8e888ccee29d5aca305cfdbae6cff2139 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 14 Sep 2020 10:01:19 +0200 Subject: mptcp: simult flow self-tests Add a bunch of test-cases for multiple subflow xmit: create multiple subflows simulating different links condition via netem and verify that the msk is able to use completely the aggregated bandwidth. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/Makefile | 3 +- tools/testing/selftests/net/mptcp/simult_flows.sh | 293 ++++++++++++++++++++++ 2 files changed, 295 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/mptcp/simult_flows.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index aa254aefc2c3..00bb158b4a5d 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -5,7 +5,8 @@ KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh +TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ + simult_flows.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh new file mode 100755 index 000000000000..0d88225daa02 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -0,0 +1,293 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" +capture=false +ksft_skip=4 +timeout=30 +test_cnt=1 +ret=0 +bail=0 + +usage() { + echo "Usage: $0 [ -b ] [ -c ] [ -d ]" + echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-d: debug this script" +} + +cleanup() +{ + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" + rm -f "$capout" + + local netns + for netns in "$ns1" "$ns2" "$ns3";do + ip netns del $netns + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +# "$ns1" ns2 ns3 +# ns1eth1 ns2eth1 ns2eth3 ns3eth1 +# netem +# ns1eth2 ns2eth2 +# netem + +setup() +{ + large=$(mktemp) + small=$(mktemp) + sout=$(mktemp) + cout=$(mktemp) + capout=$(mktemp) + size=$((2048 * 4096)) + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 + dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 + + trap cleanup EXIT + + for i in "$ns1" "$ns2" "$ns3";do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up + done + + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2" + ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3" + + ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad + ip -net "$ns1" link set ns1eth1 up mtu 1500 + ip -net "$ns1" route add default via 10.0.1.2 + ip -net "$ns1" route add default via dead:beef:1::2 + + ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2 + ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad + ip -net "$ns1" link set ns1eth2 up mtu 1500 + ip -net "$ns1" route add default via 10.0.2.2 metric 101 + ip -net "$ns1" route add default via dead:beef:2::2 metric 101 + + ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 + ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow + ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0 + + ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 + ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad + ip -net "$ns2" link set ns2eth1 up mtu 1500 + + ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2 + ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad + ip -net "$ns2" link set ns2eth2 up mtu 1500 + + ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3 + ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad + ip -net "$ns2" link set ns2eth3 up mtu 1500 + ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1 + ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1 + + ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1 + ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad + ip -net "$ns3" link set ns3eth1 up mtu 1500 + ip -net "$ns3" route add default via 10.0.3.2 + ip -net "$ns3" route add default via dead:beef:3::2 + + ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 +} + +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +do_transfer() +{ + local cin=$1 + local sin=$2 + local max_time=$3 + local port + port=$((10000+$test_cnt)) + test_cnt=$((test_cnt+1)) + + :> "$cout" + :> "$sout" + :> "$capout" + + local addr_port + addr_port=$(printf "%s:%d" ${connect_addr} ${port}) + + if $capture; then + local capuser + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + local capfile="${rndh}-${port}" + local capopt="-i any -s 65535 -B 32768 ${capuser}" + + ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + local cappid_listener=$! + + ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! + + sleep 1 + fi + + ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" & + local spid=$! + + wait_local_port_listen "${ns3}" "${port}" + + local start + start=$(date +%s%3N) + ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" & + local cpid=$! + + wait $cpid + local retc=$? + wait $spid + local rets=$? + + local stop + stop=$(date +%s%3N) + + if $capture; then + sleep 1 + kill ${cappid_listener} + kill ${cappid_connector} + fi + + local duration + duration=$((stop-start)) + + cmp $sin $cout > /dev/null 2>&1 + local cmps=$? + cmp $cin $sout > /dev/null 2>&1 + local cmpc=$? + + printf "%16s" "$duration max $max_time " + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \ + [ $duration -lt $max_time ]; then + echo "[ OK ]" + cat "$capout" + return 0 + fi + + echo " [ fail ]" + echo "client exit code $retc, server $rets" 1>&2 + echo "\nnetns ${ns3} socket stat for $port:" 1>&2 + ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" + echo "\nnetns ${ns1} socket stat for $port:" 1>&2 + ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" + ls -l $sin $cout + ls -l $cin $sout + + cat "$capout" + return 1 +} + +run_test() +{ + local rate1=$1 + local rate2=$2 + local delay1=$3 + local delay2=$4 + local lret + local dev + shift 4 + local msg=$* + + [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1="" + [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2="" + + for dev in ns1eth1 ns1eth2; do + tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1 + done + for dev in ns2eth1 ns2eth2; do + tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1 + done + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 + + # time is measure in ms + local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) )) + + # mptcp_connect will do some sleeps to allow the mp_join handshake + # completion + time=$((time + 1350)) + + printf "%-50s" "$msg" + do_transfer $small $large $((time * 11 / 10)) + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi + + printf "%-50s" "$msg - reverse direction" + do_transfer $large $small $((time * 11 / 10)) + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi +} + +while getopts "bcdh" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "b") + bail=1 + ;; + "c") + capture=true + ;; + "d") + set -x + ;; + "?") + usage $0 + exit 1 + ;; + esac +done + +setup +run_test 10 10 0 0 "balanced bwidth" +run_test 10 10 1 50 "balanced bwidth with unbalanced delay" + +# we still need some additional infrastructure to pass the following test-cases +# run_test 30 10 0 0 "unbalanced bwidth" +# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" +# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +exit $ret -- cgit v1.3.1 From 63bea244fee23f31a4379769ef09e30365bb74b7 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 14 Sep 2020 11:31:10 -0700 Subject: bpftool: Fix build failure When building bpf selftests like make -C tools/testing/selftests/bpf -j20 I hit the following errors: ... GEN /net-next/tools/testing/selftests/bpf/tools/build/bpftool/Documentation/bpftool-gen.8 :75: (WARNING/2) Block quote ends without a blank line; unexpected unindent. :71: (WARNING/2) Literal block ends without a blank line; unexpected unindent. :85: (WARNING/2) Literal block ends without a blank line; unexpected unindent. :57: (WARNING/2) Block quote ends without a blank line; unexpected unindent. :66: (WARNING/2) Literal block ends without a blank line; unexpected unindent. :109: (WARNING/2) Literal block ends without a blank line; unexpected unindent. :175: (WARNING/2) Literal block ends without a blank line; unexpected unindent. :273: (WARNING/2) Literal block ends without a blank line; unexpected unindent. make[1]: *** [/net-next/tools/testing/selftests/bpf/tools/build/bpftool/Documentation/bpftool-perf.8] Error 12 make[1]: *** Waiting for unfinished jobs.... make[1]: *** [/net-next/tools/testing/selftests/bpf/tools/build/bpftool/Documentation/bpftool-iter.8] Error 12 make[1]: *** [/net-next/tools/testing/selftests/bpf/tools/build/bpftool/Documentation/bpftool-struct_ops.8] Error 12 ... I am using: -bash-4.4$ rst2man --version rst2man (Docutils 0.11 [repository], Python 2.7.5, on linux2) -bash-4.4$ The Makefile generated final .rst file (e.g., bpftool-cgroup.rst) looks like ... ID AttachType AttachFlags Name \n SEE ALSO\n========\n\t**bpf**\ (2),\n\t**bpf-helpers**\ (7),\n\t**bpftool**\ (8),\n\t**bpftool-btf**\ (8),\n\t**bpftool-feature**\ (8),\n\t**bpftool-gen**\ (8),\n\t**bpftool-iter**\ (8),\n\t**bpftool-link**\ (8),\n\t**bpftool-map**\ (8),\n\t**bpftool-net**\ (8),\n\t**bpftool-perf**\ (8),\n\t**bpftool-prog**\ (8),\n\t**bpftool-struct_ops**\ (8)\n The rst2man generated .8 file looks like Literal block ends without a blank line; unexpected unindent. .sp n SEEALSOn========nt**bpf**(2),nt**bpf\-helpers**(7),nt**bpftool**(8),nt**bpftool\-btf**(8),nt** bpftool\-feature**(8),nt**bpftool\-gen**(8),nt**bpftool\-iter**(8),nt**bpftool\-link**(8),nt** bpftool\-map**(8),nt**bpftool\-net**(8),nt**bpftool\-perf**(8),nt**bpftool\-prog**(8),nt** bpftool\-struct_ops**(8)n Looks like that particular version of rst2man prefers to have actual new line instead of \n. Since `echo -e` may not be available in some environment, let us use `printf`. Format string "%b" is used for `printf` to ensure all escape characters are interpretted properly. Fixes: 18841da98100 ("tools: bpftool: Automate generation for "SEE ALSO" sections in man pages") Suggested-by: Andrii Nakryiko Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Andrii Nakryiko Cc: Quentin Monnet Link: https://lore.kernel.org/bpf/20200914183110.999906-1-yhs@fb.com --- tools/bpf/bpftool/Documentation/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 4c9dd1e45244..f33cb02de95c 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -44,7 +44,7 @@ $(OUTPUT)%.8: %.rst ifndef RST2MAN_DEP $(error "rst2man not found, but required to generate man pages") endif - $(QUIET_GEN)( cat $< ; echo -n $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@ + $(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@ clean: helpers-clean $(call QUIET_CLEAN, Documentation) -- cgit v1.3.1 From d317b0a8acfc4b126858e4cdadb03338d22f8ce0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 14 Sep 2020 15:32:10 -0700 Subject: libbpf: Fix a compilation error with xsk.c for ubuntu 16.04 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When syncing latest libbpf repo to bcc, ubuntu 16.04 (4.4.0 LTS kernel) failed compilation for xsk.c: In file included from /tmp/debuild.0jkauG/bcc/src/cc/libbpf/src/xsk.c:23:0: /tmp/debuild.0jkauG/bcc/src/cc/libbpf/src/xsk.c: In function ‘xsk_get_ctx’: /tmp/debuild.0jkauG/bcc/src/cc/libbpf/include/linux/list.h:81:9: warning: implicit declaration of function ‘container_of’ [-Wimplicit-function-declaration] container_of(ptr, type, member) ^ /tmp/debuild.0jkauG/bcc/src/cc/libbpf/include/linux/list.h:83:9: note: in expansion of macro ‘list_entry’ list_entry((ptr)->next, type, member) ... src/cc/CMakeFiles/bpf-static.dir/build.make:209: recipe for target 'src/cc/CMakeFiles/bpf-static.dir/libbpf/src/xsk.c.o' failed Commit 2f6324a3937f ("libbpf: Support shared umems between queues and devices") added include file , which uses macro "container_of". xsk.c file also includes before . In a more recent distro kernel, includes which contains the macro definition for "container_of". So compilation is all fine. But in ubuntu 16.04 kernel, does not contain which caused the above compilation error. Let explicitly add in xsk.c to avoid compilation error in old distro's. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200914223210.1831262-1-yhs@fb.com --- tools/lib/bpf/xsk.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 49c324594792..30b4ca5d2ac7 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include -- cgit v1.3.1 From ac234524056da4e0c081f682da3ea25cdaab737a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 2 Sep 2020 09:59:45 +0530 Subject: selftests/powerpc: Tests for kernel accessing user memory Introduce tests to cover simple scenarios where user is watching memory which can be accessed by kernel as well. We also support _MODE_EXACT with _SETHWDEBUG interface. Move those testcases outside of _BP_RANGE condition. This will help to test _MODE_EXACT scenarios when CONFIG_HAVE_HW_BREAKPOINT is not set, eg: $ ./ptrace-hwbreak ... PTRACE_SET_DEBUGREG, Kernel Access Userspace, len: 8: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace, len: 1: Ok success: ptrace-hwbreak Suggested-by: Pedro Miraglia Franco de Carvalho Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902042945.129369-9-ravi.bangoria@linux.ibm.com --- .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 48 +++++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index fc477dfe86a2..2e0d86e0687e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "ptrace.h" #define SPRN_PVR 0x11F @@ -44,6 +46,7 @@ struct gstruct { }; static volatile struct gstruct gstruct __attribute__((aligned(512))); +static volatile char cwd[PATH_MAX] __attribute__((aligned(8))); static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) { @@ -138,6 +141,9 @@ static void test_workload(void) write_var(len); } + /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */ + syscall(__NR_getcwd, &cwd, PATH_MAX); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */ write_var(1); @@ -150,6 +156,9 @@ static void test_workload(void) else read_var(1); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */ + syscall(__NR_getcwd, &cwd, PATH_MAX); + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */ gstruct.a[rand() % A_LEN] = 'a'; @@ -293,6 +302,24 @@ static int test_set_debugreg(pid_t child_pid) return 0; } +static int test_set_debugreg_kernel_userspace(pid_t child_pid) +{ + unsigned long wp_addr = (unsigned long)cwd; + char *name = "PTRACE_SET_DEBUGREG"; + + /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */ + wp_addr &= ~0x7UL; + wp_addr |= (1Ul << DABR_READ_SHIFT); + wp_addr |= (1UL << DABR_WRITE_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8); + + ptrace_set_debugreg(child_pid, 0); + return 0; +} + static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, unsigned long addr, int len) { @@ -338,6 +365,22 @@ static void test_sethwdebug_exact(pid_t child_pid) ptrace_delhwdebug(child_pid, wh); } +static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr = (unsigned long)&cwd; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT"; + int len = 1; /* hardcoded in kernel */ + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} + static void test_sethwdebug_range_aligned(pid_t child_pid) { struct ppc_hw_breakpoint info; @@ -452,9 +495,10 @@ static void run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) { test_set_debugreg(child_pid); + test_set_debugreg_kernel_userspace(child_pid); + test_sethwdebug_exact(child_pid); + test_sethwdebug_exact_kernel_userspace(child_pid); if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) { - test_sethwdebug_exact(child_pid); - test_sethwdebug_range_aligned(child_pid); if (dawr || is_8xx) { test_sethwdebug_range_unaligned(child_pid); -- cgit v1.3.1 From 242aaf03dc9be30027d3159a13b935856dab3ba0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Sep 2020 17:11:55 -0700 Subject: selftests: add a test for ethtool pause stats Make sure the empty nest is reported even without stats. Make sure reporting only selected stats works fine. Signed-off-by: Jakub Kicinski Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../drivers/net/netdevsim/ethtool-pause.sh | 108 +++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh new file mode 100755 index 000000000000..dd6ad6501c9c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID +NSIM_NETDEV= +num_passes=0 +num_errors=0 + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_nsim +} + +trap cleanup EXIT + +function get_netdev_name { + local -n old=$1 + + new=$(ls /sys/class/net) + + for netdev in $new; do + for check in $old; do + [ $netdev == $check ] && break + done + + if [ $netdev != $check ]; then + echo $netdev + break + fi + done +} + +function check { + local code=$1 + local str=$2 + local exp_str=$3 + + if [ $code -ne 0 ]; then + ((num_errors++)) + return + fi + + if [ "$str" != "$exp_str" ]; then + echo -e "Expected: '$exp_str', got '$str'" + ((num_errors++)) + return + fi + + ((num_passes++)) +} + +# Bail if ethtool is too old +if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then + echo "SKIP: No --include-statistics support in ethtool" + exit 4 +fi + +# Make a netdevsim +old_netdevs=$(ls /sys/class/net) + +modprobe netdevsim +echo $NSIM_ID > /sys/bus/netdevsim/new_device + +NSIM_NETDEV=`get_netdev_name old_netdevs` + +set -o pipefail + +echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx +echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx + +s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics') +check $? "$s" "null" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics') +check $? "$s" "{}" + +echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length') +check $? "$s" "1" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames') +check $? "$s" "2" + +echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length') +check $? "$s" "2" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames') +check $? "$s" "1" +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames') +check $? "$s" "2" + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi -- cgit v1.3.1 From 7a5e9d84f9e436035f44310511efdddb6e497996 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Sep 2020 14:41:03 +0300 Subject: selftests: fib_nexthops: Test cleanup of FDB entries following nexthop deletion Commit c7cdbe2efc40 ("vxlan: support for nexthop notifiers") registered a listener in the VXLAN driver to the nexthop notification chain. Its purpose is to cleanup FDB entries that use a nexthop that is being deleted. Test that such FDB entries are removed when the nexthop group that they use is deleted. Test that entries are not deleted when a single nexthop in the group is deleted. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthops.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index b74884d52913..eb693a3b7b4a 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -411,9 +411,16 @@ ipv6_fdb_grp_fcnal() run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103" log_test $? 2 "Route add with fdb nexthop group" + run_cmd "$IP nexthop del id 61" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 0 "Fdb entry after deleting a single nexthop" + run_cmd "$IP nexthop del id 102" log_test $? 0 "Fdb nexthop delete" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 254 "Fdb entry after deleting a nexthop group" + $IP link del dev vx10 } @@ -484,9 +491,16 @@ ipv4_fdb_grp_fcnal() run_cmd "$IP ro add 172.16.0.0/22 nhid 103" log_test $? 2 "Route add with fdb nexthop group" + run_cmd "$IP nexthop del id 12" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 0 "Fdb entry after deleting a single nexthop" + run_cmd "$IP nexthop del id 102" log_test $? 0 "Fdb nexthop delete" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 254 "Fdb entry after deleting a nexthop group" + $IP link del dev vx10 } -- cgit v1.3.1 From ef15314aa5de955c6afd87d512e8b00f5ac08d06 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 15 Sep 2020 16:45:40 -0700 Subject: bpf: Add BPF_PROG_BIND_MAP syscall This syscall binds a map to a program. Returns success if the map is already bound to the program. Signed-off-by: YiFei Zhu Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: YiFei Zhu Link: https://lore.kernel.org/bpf/20200915234543.3220146-3-sdf@google.com --- include/uapi/linux/bpf.h | 7 +++++ kernel/bpf/syscall.c | 63 ++++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 +++++ 3 files changed, 77 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7dd314176df7..a22812561064 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -124,6 +124,7 @@ enum bpf_cmd { BPF_ENABLE_STATS, BPF_ITER_CREATE, BPF_LINK_DETACH, + BPF_PROG_BIND_MAP, }; enum bpf_map_type { @@ -658,6 +659,12 @@ union bpf_attr { __u32 flags; } iter_create; + struct { /* struct used by BPF_PROG_BIND_MAP command */ + __u32 prog_fd; + __u32 map_fd; + __u32 flags; /* extra flags */ + } prog_bind_map; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a67b8c6746be..2ce32cad5c8e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4161,6 +4161,66 @@ static int bpf_iter_create(union bpf_attr *attr) return err; } +#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags + +static int bpf_prog_bind_map(union bpf_attr *attr) +{ + struct bpf_prog *prog; + struct bpf_map *map; + struct bpf_map **used_maps_old, **used_maps_new; + int i, ret = 0; + + if (CHECK_ATTR(BPF_PROG_BIND_MAP)) + return -EINVAL; + + if (attr->prog_bind_map.flags) + return -EINVAL; + + prog = bpf_prog_get(attr->prog_bind_map.prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + map = bpf_map_get(attr->prog_bind_map.map_fd); + if (IS_ERR(map)) { + ret = PTR_ERR(map); + goto out_prog_put; + } + + mutex_lock(&prog->aux->used_maps_mutex); + + used_maps_old = prog->aux->used_maps; + + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (used_maps_old[i] == map) + goto out_unlock; + + used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, + sizeof(used_maps_new[0]), + GFP_KERNEL); + if (!used_maps_new) { + ret = -ENOMEM; + goto out_unlock; + } + + memcpy(used_maps_new, used_maps_old, + sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); + used_maps_new[prog->aux->used_map_cnt] = map; + + prog->aux->used_map_cnt++; + prog->aux->used_maps = used_maps_new; + + kfree(used_maps_old); + +out_unlock: + mutex_unlock(&prog->aux->used_maps_mutex); + + if (ret) + bpf_map_put(map); +out_prog_put: + bpf_prog_put(prog); + return ret; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr; @@ -4294,6 +4354,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_LINK_DETACH: err = link_detach(&attr); break; + case BPF_PROG_BIND_MAP: + err = bpf_prog_bind_map(&attr); + break; default: err = -EINVAL; break; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7dd314176df7..a22812561064 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -124,6 +124,7 @@ enum bpf_cmd { BPF_ENABLE_STATS, BPF_ITER_CREATE, BPF_LINK_DETACH, + BPF_PROG_BIND_MAP, }; enum bpf_map_type { @@ -658,6 +659,12 @@ union bpf_attr { __u32 flags; } iter_create; + struct { /* struct used by BPF_PROG_BIND_MAP command */ + __u32 prog_fd; + __u32 map_fd; + __u32 flags; /* extra flags */ + } prog_bind_map; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.3.1 From 5d23328dccd93c47e2719cb9d2ae303c235d277d Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 15 Sep 2020 16:45:41 -0700 Subject: libbpf: Add BPF_PROG_BIND_MAP syscall and use it on .rodata section The patch adds a simple wrapper bpf_prog_bind_map around the syscall. When the libbpf tries to load a program, it will probe the kernel for the support of this syscall and unconditionally bind .rodata section to the program. Signed-off-by: YiFei Zhu Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Cc: YiFei Zhu Link: https://lore.kernel.org/bpf/20200915234543.3220146-4-sdf@google.com --- tools/lib/bpf/bpf.c | 16 +++++++++++ tools/lib/bpf/bpf.h | 8 ++++++ tools/lib/bpf/libbpf.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.map | 1 + 4 files changed, 94 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 82b983ff6569..2baa1308737c 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -872,3 +872,19 @@ int bpf_enable_stats(enum bpf_stats_type type) return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); } + +int bpf_prog_bind_map(int prog_fd, int map_fd, + const struct bpf_prog_bind_opts *opts) +{ + union bpf_attr attr; + + if (!OPTS_VALID(opts, bpf_prog_bind_opts)) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.prog_bind_map.prog_fd = prog_fd; + attr.prog_bind_map.map_fd = map_fd; + attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0); + + return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 015d13f25fcc..8c1ac4b42f90 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -243,6 +243,14 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */ LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); +struct bpf_prog_bind_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; +}; +#define bpf_prog_bind_opts__last_field flags + +LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd, + const struct bpf_prog_bind_opts *opts); #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 550950eb1860..570235dbc922 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -174,6 +174,8 @@ enum kern_feature_id { FEAT_EXP_ATTACH_TYPE, /* bpf_probe_read_{kernel,user}[_str] helpers */ FEAT_PROBE_READ_KERN, + /* BPF_PROG_BIND_MAP is supported */ + FEAT_PROG_BIND_MAP, __FEAT_CNT, }; @@ -409,6 +411,7 @@ struct bpf_object { struct extern_desc *externs; int nr_extern; int kconfig_map_idx; + int rodata_map_idx; bool loaded; bool has_subcalls; @@ -1070,6 +1073,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.bss_shndx = -1; obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; + obj->rodata_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; @@ -1428,6 +1432,8 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) obj->efile.rodata->d_size); if (err) return err; + + obj->rodata_map_idx = obj->nr_maps - 1; } if (obj->efile.bss_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, @@ -3894,6 +3900,52 @@ static int probe_kern_probe_read_kernel(void) return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); } +static int probe_prog_bind_map(void) +{ + struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, prog; + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; + map_attr.key_size = sizeof(int); + map_attr.value_size = 32; + map_attr.max_entries = 1; + + map = bpf_create_map_xattr(&map_attr); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + memset(&prg_attr, 0, sizeof(prg_attr)); + prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + prg_attr.insns = insns; + prg_attr.insns_cnt = ARRAY_SIZE(insns); + prg_attr.license = "GPL"; + + prog = bpf_load_program_xattr(&prg_attr, NULL, 0); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -3934,6 +3986,9 @@ static struct kern_feature_desc { }, [FEAT_PROBE_READ_KERN] = { "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, } }; @@ -6468,6 +6523,20 @@ retry_load: if (ret >= 0) { if (log_buf && load_attr.log_level) pr_debug("verifier log:\n%s", log_buf); + + if (prog->obj->rodata_map_idx >= 0 && + kernel_supports(FEAT_PROG_BIND_MAP)) { + struct bpf_map *rodata_map = + &prog->obj->maps[prog->obj->rodata_map_idx]; + + if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("prog '%s': failed to bind .rodata map: %s\n", + prog->name, cp); + /* Don't fail hard if can't bind rodata. */ + } + } + *pfd = ret; ret = 0; goto out; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 92ceb48a5ca2..5f054dadf082 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -302,6 +302,7 @@ LIBBPF_0.1.0 { LIBBPF_0.2.0 { global: + bpf_prog_bind_map; bpf_program__section_name; perf_buffer__buffer_cnt; perf_buffer__buffer_fd; -- cgit v1.3.1 From aff52e685eb39984f3a613e8a5c570d97e5d2414 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 15 Sep 2020 16:45:42 -0700 Subject: bpftool: Support dumping metadata Dump metadata in the 'bpftool prog' list if it's present. For some formatting some BTF code is put directly in the metadata dumping. Sanity checks on the map and the kind of the btf_type to make sure we are actually dumping what we are expecting. A helper jsonw_reset is added to json writer so we can reuse the same json writer without having extraneous commas. Sample output: $ bpftool prog 6: cgroup_skb name prog tag bcf7977d3b93787c gpl [...] btf_id 4 metadata: a = "foo" b = 1 $ bpftool prog --json --pretty [{ "id": 6, [...] "btf_id": 4, "metadata": { "a": "foo", "b": 1 } } ] Signed-off-by: YiFei Zhu Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Cc: YiFei Zhu Link: https://lore.kernel.org/bpf/20200915234543.3220146-5-sdf@google.com --- tools/bpf/bpftool/json_writer.c | 6 ++ tools/bpf/bpftool/json_writer.h | 3 + tools/bpf/bpftool/prog.c | 199 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 208 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index 86501cd3c763..7fea83bedf48 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -119,6 +119,12 @@ void jsonw_pretty(json_writer_t *self, bool on) self->pretty = on; } +void jsonw_reset(json_writer_t *self) +{ + assert(self->depth == 0); + self->sep = '\0'; +} + /* Basic blocks */ static void jsonw_begin(json_writer_t *self, int c) { diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h index 35cf1f00f96c..8ace65cdb92f 100644 --- a/tools/bpf/bpftool/json_writer.h +++ b/tools/bpf/bpftool/json_writer.h @@ -27,6 +27,9 @@ void jsonw_destroy(json_writer_t **self_p); /* Cause output to have pretty whitespace */ void jsonw_pretty(json_writer_t *self, bool on); +/* Reset separator to create new JSON */ +void jsonw_reset(json_writer_t *self); + /* Add property name */ void jsonw_name(json_writer_t *self, const char *name); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f7923414a052..d942c1e3372c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -29,6 +29,9 @@ #include "main.h" #include "xlated_dumper.h" +#define BPF_METADATA_PREFIX "bpf_metadata_" +#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1) + const char * const prog_type_name[] = { [BPF_PROG_TYPE_UNSPEC] = "unspec", [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", @@ -151,6 +154,198 @@ static void show_prog_maps(int fd, __u32 num_maps) } } +static void *find_metadata(int prog_fd, struct bpf_map_info *map_info) +{ + struct bpf_prog_info prog_info; + __u32 prog_info_len; + __u32 map_info_len; + void *value = NULL; + __u32 *map_ids; + int nr_maps; + int key = 0; + int map_fd; + int ret; + __u32 i; + + memset(&prog_info, 0, sizeof(prog_info)); + prog_info_len = sizeof(prog_info); + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + if (ret) + return NULL; + + if (!prog_info.nr_map_ids) + return NULL; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32)); + if (!map_ids) + return NULL; + + nr_maps = prog_info.nr_map_ids; + memset(&prog_info, 0, sizeof(prog_info)); + prog_info.nr_map_ids = nr_maps; + prog_info.map_ids = ptr_to_u64(map_ids); + prog_info_len = sizeof(prog_info); + + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + if (ret) + goto free_map_ids; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + map_fd = bpf_map_get_fd_by_id(map_ids[i]); + if (map_fd < 0) + goto free_map_ids; + + memset(map_info, 0, sizeof(*map_info)); + map_info_len = sizeof(*map_info); + ret = bpf_obj_get_info_by_fd(map_fd, map_info, &map_info_len); + if (ret < 0) { + close(map_fd); + goto free_map_ids; + } + + if (map_info->type != BPF_MAP_TYPE_ARRAY || + map_info->key_size != sizeof(int) || + map_info->max_entries != 1 || + !map_info->btf_value_type_id || + !strstr(map_info->name, ".rodata")) { + close(map_fd); + continue; + } + + value = malloc(map_info->value_size); + if (!value) { + close(map_fd); + goto free_map_ids; + } + + if (bpf_map_lookup_elem(map_fd, &key, value)) { + close(map_fd); + free(value); + value = NULL; + goto free_map_ids; + } + + close(map_fd); + break; + } + +free_map_ids: + free(map_ids); + return value; +} + +static bool has_metadata_prefix(const char *s) +{ + return strncmp(s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) == 0; +} + +static void show_prog_metadata(int fd, __u32 num_maps) +{ + const struct btf_type *t_datasec, *t_var; + struct bpf_map_info map_info; + struct btf_var_secinfo *vsi; + bool printed_header = false; + struct btf *btf = NULL; + unsigned int i, vlen; + void *value = NULL; + const char *name; + int err; + + if (!num_maps) + return; + + memset(&map_info, 0, sizeof(map_info)); + value = find_metadata(fd, &map_info); + if (!value) + return; + + err = btf__get_from_id(map_info.btf_id, &btf); + if (err || !btf) + goto out_free; + + t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id); + if (!btf_is_datasec(t_datasec)) + goto out_free; + + vlen = btf_vlen(t_datasec); + vsi = btf_var_secinfos(t_datasec); + + /* We don't proceed to check the kinds of the elements of the DATASEC. + * The verifier enforces them to be BTF_KIND_VAR. + */ + + if (json_output) { + struct btf_dumper d = { + .btf = btf, + .jw = json_wtr, + .is_plain_text = false, + }; + + for (i = 0; i < vlen; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + name = btf__name_by_offset(btf, t_var->name_off); + + if (!has_metadata_prefix(name)) + continue; + + if (!printed_header) { + jsonw_name(json_wtr, "metadata"); + jsonw_start_object(json_wtr); + printed_header = true; + } + + jsonw_name(json_wtr, name + BPF_METADATA_PREFIX_LEN); + err = btf_dumper_type(&d, t_var->type, value + vsi->offset); + if (err) { + p_err("btf dump failed: %d", err); + break; + } + } + if (printed_header) + jsonw_end_object(json_wtr); + } else { + json_writer_t *btf_wtr = jsonw_new(stdout); + struct btf_dumper d = { + .btf = btf, + .jw = btf_wtr, + .is_plain_text = true, + }; + + if (!btf_wtr) { + p_err("jsonw alloc failed"); + goto out_free; + } + + for (i = 0; i < vlen; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + name = btf__name_by_offset(btf, t_var->name_off); + + if (!has_metadata_prefix(name)) + continue; + + if (!printed_header) { + printf("\tmetadata:"); + printed_header = true; + } + + printf("\n\t\t%s = ", name + BPF_METADATA_PREFIX_LEN); + + jsonw_reset(btf_wtr); + err = btf_dumper_type(&d, t_var->type, value + vsi->offset); + if (err) { + p_err("btf dump failed: %d", err); + break; + } + } + if (printed_header) + jsonw_destroy(&btf_wtr); + } + +out_free: + btf__free(btf); + free(value); +} + static void print_prog_header_json(struct bpf_prog_info *info) { jsonw_uint_field(json_wtr, "id", info->id); @@ -228,6 +423,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) emit_obj_refs_json(&refs_table, info->id, json_wtr); + show_prog_metadata(fd, info->nr_map_ids); + jsonw_end_object(json_wtr); } @@ -297,6 +494,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); + + show_prog_metadata(fd, info->nr_map_ids); } static int show_prog(int fd) -- cgit v1.3.1 From d42d1cc44d702123d6ff12ce54a0e854036d29cb Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 15 Sep 2020 16:45:43 -0700 Subject: selftests/bpf: Test load and dump metadata with btftool and skel This is a simple test to check that loading and dumping metadata in btftool works, whether or not metadata contents are used by the program. A C test is also added to make sure the skeleton code can read the metadata values. Signed-off-by: YiFei Zhu Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: YiFei Zhu Link: https://lore.kernel.org/bpf/20200915234543.3220146-6-sdf@google.com --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/prog_tests/metadata.c | 141 +++++++++++++++++++++ .../testing/selftests/bpf/progs/metadata_unused.c | 15 +++ tools/testing/selftests/bpf/progs/metadata_used.c | 15 +++ .../testing/selftests/bpf/test_bpftool_metadata.sh | 82 ++++++++++++ 5 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/metadata.c create mode 100644 tools/testing/selftests/bpf/progs/metadata_unused.c create mode 100644 tools/testing/selftests/bpf/progs/metadata_used.c create mode 100755 tools/testing/selftests/bpf/test_bpftool_metadata.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 05798c2b5c67..2a63791177c4 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -68,7 +68,8 @@ TEST_PROGS := test_kmod.sh \ test_tc_edt.sh \ test_xdping.sh \ test_bpftool_build.sh \ - test_bpftool.sh + test_bpftool.sh \ + test_bpftool_metadata.sh \ TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c new file mode 100644 index 000000000000..2c53eade88e3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/metadata.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include +#include +#include + +#include "metadata_unused.skel.h" +#include "metadata_used.skel.h" + +static int duration; + +static int prog_holds_map(int prog_fd, int map_fd) +{ + struct bpf_prog_info prog_info = {}; + struct bpf_prog_info map_info = {}; + __u32 prog_info_len; + __u32 map_info_len; + __u32 *map_ids; + int nr_maps; + int ret; + int i; + + map_info_len = sizeof(map_info); + ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); + if (ret) + return -errno; + + prog_info_len = sizeof(prog_info); + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + if (ret) + return -errno; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32)); + if (!map_ids) + return -ENOMEM; + + nr_maps = prog_info.nr_map_ids; + memset(&prog_info, 0, sizeof(prog_info)); + prog_info.nr_map_ids = nr_maps; + prog_info.map_ids = ptr_to_u64(map_ids); + prog_info_len = sizeof(prog_info); + + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + if (ret) { + ret = -errno; + goto free_map_ids; + } + + ret = -ENOENT; + for (i = 0; i < prog_info.nr_map_ids; i++) { + if (map_ids[i] == map_info.id) { + ret = 0; + break; + } + } + +free_map_ids: + free(map_ids); + return ret; +} + +static void test_metadata_unused(void) +{ + struct metadata_unused *obj; + int err; + + obj = metadata_unused__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + err = prog_holds_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata)); + if (CHECK(err, "prog-holds-rodata", "errno: %d", err)) + return; + + /* Assert that we can access the metadata in skel and the values are + * what we expect. + */ + if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "foo", + sizeof(obj->rodata->bpf_metadata_a)), + "bpf_metadata_a", "expected \"foo\", value differ")) + goto close_bpf_object; + if (CHECK(obj->rodata->bpf_metadata_b != 1, "bpf_metadata_b", + "expected 1, got %d", obj->rodata->bpf_metadata_b)) + goto close_bpf_object; + + /* Assert that binding metadata map to prog again succeeds. */ + err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata), NULL); + CHECK(err, "rebind_map", "errno %d, expected 0", errno); + +close_bpf_object: + metadata_unused__destroy(obj); +} + +static void test_metadata_used(void) +{ + struct metadata_used *obj; + int err; + + obj = metadata_used__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + err = prog_holds_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata)); + if (CHECK(err, "prog-holds-rodata", "errno: %d", err)) + return; + + /* Assert that we can access the metadata in skel and the values are + * what we expect. + */ + if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "bar", + sizeof(obj->rodata->bpf_metadata_a)), + "metadata_a", "expected \"bar\", value differ")) + goto close_bpf_object; + if (CHECK(obj->rodata->bpf_metadata_b != 2, "metadata_b", + "expected 2, got %d", obj->rodata->bpf_metadata_b)) + goto close_bpf_object; + + /* Assert that binding metadata map to prog again succeeds. */ + err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata), NULL); + CHECK(err, "rebind_map", "errno %d, expected 0", errno); + +close_bpf_object: + metadata_used__destroy(obj); +} + +void test_metadata(void) +{ + if (test__start_subtest("unused")) + test_metadata_unused(); + + if (test__start_subtest("used")) + test_metadata_used(); +} diff --git a/tools/testing/selftests/bpf/progs/metadata_unused.c b/tools/testing/selftests/bpf/progs/metadata_unused.c new file mode 100644 index 000000000000..672a0d19f8d0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/metadata_unused.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +volatile const char bpf_metadata_a[] SEC(".rodata") = "foo"; +volatile const int bpf_metadata_b SEC(".rodata") = 1; + +SEC("cgroup_skb/egress") +int prog(struct xdp_md *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/metadata_used.c b/tools/testing/selftests/bpf/progs/metadata_used.c new file mode 100644 index 000000000000..b7198e65383d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/metadata_used.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +volatile const char bpf_metadata_a[] SEC(".rodata") = "bar"; +volatile const int bpf_metadata_b SEC(".rodata") = 2; + +SEC("cgroup_skb/egress") +int prog(struct xdp_md *ctx) +{ + return bpf_metadata_b ? 1 : 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh new file mode 100755 index 000000000000..1bf81b49457a --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTNAME=bpftool_metadata +BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) +BPF_DIR=$BPF_FS/test_$TESTNAME + +_cleanup() +{ + set +e + rm -rf $BPF_DIR 2> /dev/null +} + +cleanup_skip() +{ + echo "selftests: $TESTNAME [SKIP]" + _cleanup + + exit $ksft_skip +} + +cleanup() +{ + if [ "$?" = 0 ]; then + echo "selftests: $TESTNAME [PASS]" + else + echo "selftests: $TESTNAME [FAILED]" + fi + _cleanup +} + +if [ $(id -u) -ne 0 ]; then + echo "selftests: $TESTNAME [SKIP] Need root privileges" + exit $ksft_skip +fi + +if [ -z "$BPF_FS" ]; then + echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted" + exit $ksft_skip +fi + +if ! bpftool version > /dev/null 2>&1; then + echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool" + exit $ksft_skip +fi + +set -e + +trap cleanup_skip EXIT + +mkdir $BPF_DIR + +trap cleanup EXIT + +bpftool prog load metadata_unused.o $BPF_DIR/unused + +METADATA_PLAIN="$(bpftool prog)" +echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null +echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null + +bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null + +bpftool map | grep 'metadata.rodata' > /dev/null + +rm $BPF_DIR/unused + +bpftool prog load metadata_used.o $BPF_DIR/used + +METADATA_PLAIN="$(bpftool prog)" +echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null +echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null + +bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null + +bpftool map | grep 'metadata.rodata' > /dev/null + +rm $BPF_DIR/used + +exit 0 -- cgit v1.3.1 From c64779e24e88a9915b2f35fbb9851ef2c5462cc2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Sep 2020 17:48:19 -0700 Subject: selftests/bpf: Merge most of test_btf into test_progs Merge 183 tests from test_btf into test_progs framework to be exercised regularly. All the test_btf tests that were moved are modeled as proper sub-tests in test_progs framework for ease of debugging and reporting. No functional or behavioral changes were intended, I tried to preserve original behavior as much as possible. E.g., `test_progs -v` will activate "always_log" flag to emit BTF validation log. The only difference is in reducing the max_entries limit for pretty-printing tests from (128 * 1024) to just 128 to reduce tests running time without reducing the coverage. Example test run: $ sudo ./test_progs -n 8 ... #8 btf:OK Summary: 1/183 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200916004819.3767489-1-andriin@fb.com --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/prog_tests/btf.c | 6811 +++++++++++++++++++++++++ tools/testing/selftests/bpf/test_btf.c | 7067 -------------------------- 4 files changed, 6812 insertions(+), 7069 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/btf.c delete mode 100644 tools/testing/selftests/bpf/test_btf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9a0946ddb705..e8fed558b8b8 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -15,7 +15,6 @@ test_sock test_sock_addr test_sock_fields urandom_read -test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2a63791177c4..59a5fa5fe837 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -33,7 +33,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_verifier_log test_dev_cgroup test_tcpbpf_user \ - test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ + test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ test_progs-no_alu32 \ diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c new file mode 100644 index 000000000000..93162484c2ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -0,0 +1,6811 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "../test_btf.h" +#include "test_progs.h" + +#define MAX_INSNS 512 +#define MAX_SUBPROGS 16 + +static int duration = 0; +static bool always_log; + +#undef CHECK +#define CHECK(condition, format...) _CHECK(condition, "check", duration, format) + +#define BTF_END_RAW 0xdeadbeef +#define NAME_TBD 0xdeadb33f + +#define NAME_NTH(N) (0xffff0000 | N) +#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000) +#define GET_NAME_NTH_IDX(X) (X & 0x0000ffff) + +#define MAX_NR_RAW_U32 1024 +#define BTF_LOG_BUF_SIZE 65535 + +static char btf_log_buf[BTF_LOG_BUF_SIZE]; + +static struct btf_header hdr_tmpl = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), +}; + +/* several different mapv kinds(types) supported by pprint */ +enum pprint_mapv_kind_t { + PPRINT_MAPV_KIND_BASIC = 0, + PPRINT_MAPV_KIND_INT128, +}; + +struct btf_raw_test { + const char *descr; + const char *str_sec; + const char *map_name; + const char *err_str; + __u32 raw_types[MAX_NR_RAW_U32]; + __u32 str_sec_size; + enum bpf_map_type map_type; + __u32 key_size; + __u32 value_size; + __u32 key_type_id; + __u32 value_type_id; + __u32 max_entries; + bool btf_load_err; + bool map_create_err; + bool ordered_map; + bool lossless_map; + bool percpu_map; + int hdr_len_delta; + int type_off_delta; + int str_off_delta; + int str_len_delta; + enum pprint_mapv_kind_t mapv_kind; +}; + +#define BTF_STR_SEC(str) \ + .str_sec = str, .str_sec_size = sizeof(str) + +static struct btf_raw_test raw_tests[] = { +/* enum E { + * E0, + * E1, + * }; + * + * struct A { + * unsigned long long m; + * int n; + * char o; + * [3 bytes hole] + * int p[8]; + * int q[4][8]; + * enum E r; + * }; + */ +{ + .descr = "struct test #1", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */ + BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */ + /* } */ + /* int[4][8] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */ + /* enum E */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test1_map", + .key_size = sizeof(int), + .value_size = 180, + .key_type_id = 1, + .value_type_id = 5, + .max_entries = 4, +}, + +/* typedef struct b Struct_B; + * + * struct A { + * int m; + * struct b n[4]; + * const Struct_B o[4]; + * }; + * + * struct B { + * int m; + * int n; + * }; + */ +{ + .descr = "struct test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct b [4] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), + + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4] */ + BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/ + /* } */ + + /* struct B { */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */ + /* } */ + + /* const int */ /* [5] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), + /* typedef struct b Struct_B */ /* [6] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4), + /* const Struct_B */ /* [7] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6), + /* const Struct_B [4] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(7, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B", + .str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test2_map", + .key_size = sizeof(int), + .value_size = 68, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, +}, +{ + .descr = "struct test #3 Invalid member offset", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int64 */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), + + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 16), + BTF_MEMBER_ENC(NAME_TBD, 1, 64), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* int64 n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0", + .str_sec_size = sizeof("\0A\0m\0n\0"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test3_map", + .key_size = sizeof(int), + .value_size = 16, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member bits_offset", +}, +/* + * struct A { + * unsigned long long m; + * int n; + * char o; + * [3 bytes hole] + * int p[8]; + * }; + */ +{ + .descr = "global data test #1", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test1_map", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 1, + .value_type_id = 5, + .max_entries = 4, +}, +/* + * struct A { + * unsigned long long m; + * int n; + * char o; + * [3 bytes hole] + * int p[8]; + * }; + * static struct A t; <- in .bss + */ +{ + .descr = "global data test #2", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* .bss section */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48), + BTF_VAR_SECINFO_ENC(6, 0, 48), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 0, + .value_type_id = 7, + .max_entries = 1, +}, +{ + .descr = "global data test #3", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0t\0.bss", + .str_sec_size = sizeof("\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 3, + .max_entries = 1, +}, +{ + .descr = "global data test #4, unsupported linkage", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_TBD, 1, 2), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0t\0.bss", + .str_sec_size = sizeof("\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 3, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Linkage not supported", +}, +{ + .descr = "global data test #5, invalid var type", + .raw_types = { + /* static void t */ + BTF_VAR_ENC(NAME_TBD, 0, 0), /* [1] */ + /* .bss section */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(1, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0t\0.bss", + .str_sec_size = sizeof("\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #6, invalid var type (fwd type)", + .raw_types = { + /* union A */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */ + /* static union A t */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0.bss", + .str_sec_size = sizeof("\0A\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type", +}, +{ + .descr = "global data test #7, invalid var type (fwd type)", + .raw_types = { + /* union A */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */ + /* static union A t */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(1, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0.bss", + .str_sec_size = sizeof("\0A\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type", +}, +{ + .descr = "global data test #8, invalid var size", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* .bss section */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48), + BTF_VAR_SECINFO_ENC(6, 0, 47), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 0, + .value_type_id = 7, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid size", +}, +{ + .descr = "global data test #9, invalid var size", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* .bss section */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46), + BTF_VAR_SECINFO_ENC(6, 0, 48), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 0, + .value_type_id = 7, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid size", +}, +{ + .descr = "global data test #10, invalid var size", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* .bss section */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46), + BTF_VAR_SECINFO_ENC(6, 0, 46), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 0, + .value_type_id = 7, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid size", +}, +{ + .descr = "global data test #11, multiple section members", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* static int u */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ + /* .bss section */ /* [8] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), + BTF_VAR_SECINFO_ENC(6, 10, 48), + BTF_VAR_SECINFO_ENC(7, 58, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 62, + .key_type_id = 0, + .value_type_id = 8, + .max_entries = 1, +}, +{ + .descr = "global data test #12, invalid offset", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* static int u */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ + /* .bss section */ /* [8] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), + BTF_VAR_SECINFO_ENC(6, 10, 48), + BTF_VAR_SECINFO_ENC(7, 60, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 62, + .key_type_id = 0, + .value_type_id = 8, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid offset+size", +}, +{ + .descr = "global data test #13, invalid offset", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* static int u */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ + /* .bss section */ /* [8] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), + BTF_VAR_SECINFO_ENC(6, 10, 48), + BTF_VAR_SECINFO_ENC(7, 12, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 62, + .key_type_id = 0, + .value_type_id = 8, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid offset", +}, +{ + .descr = "global data test #14, invalid offset", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + /* } */ + /* static struct A t */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + /* static int u */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ + /* .bss section */ /* [8] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), + BTF_VAR_SECINFO_ENC(7, 58, 4), + BTF_VAR_SECINFO_ENC(6, 10, 48), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 62, + .key_type_id = 0, + .value_type_id = 8, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid offset", +}, +{ + .descr = "global data test #15, not var kind", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(1, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0.bss", + .str_sec_size = sizeof("\0A\0t\0.bss"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 3, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Not a VAR kind member", +}, +{ + .descr = "global data test #16, invalid var referencing sec", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */ + /* a section */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(3, 0, 4), + /* a section */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(6, 0, 4), + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #17, invalid var referencing var", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */ + /* a section */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(3, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #18, invalid var loop", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0aaa", + .str_sec_size = sizeof("\0A\0t\0aaa"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #19, invalid var referencing var", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 3, 0), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #20, invalid ptr referencing var", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* PTR type_id=3 */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, +{ + .descr = "global data test #21, var included in struct", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* VAR type_id=3; */ + /* } */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid member", +}, +{ + .descr = "global data test #22, array of var", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0A\0t\0s\0a\0a", + .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 0, + .value_type_id = 4, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid elem", +}, +/* Test member exceeds the size of struct. + * + * struct A { + * int m; + * int n; + * }; + */ +{ + .descr = "size check test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check1_map", + .key_size = sizeof(int), + .value_size = 1, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +/* Test member exeeds the size of struct + * + * struct A { + * int m; + * int n[2]; + * }; + */ +{ + .descr = "size check test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* int[2] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check2_map", + .key_size = sizeof(int), + .value_size = 1, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +/* Test member exeeds the size of struct + * + * struct A { + * int m; + * void *n; + * }; + */ +{ + .descr = "size check test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* void* */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check3_map", + .key_size = sizeof(int), + .value_size = 1, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +/* Test member exceeds the size of struct + * + * enum E { + * E0, + * E1, + * }; + * + * struct A { + * int m; + * enum E n; + * }; + */ +{ + .descr = "size check test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* enum E { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + /* } */ + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0E\0E0\0E1\0A\0m\0n", + .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check4_map", + .key_size = sizeof(int), + .value_size = 1, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +/* Test member unexceeds the size of struct + * + * enum E { + * E0, + * E1, + * }; + * + * struct A { + * char m; + * enum E __attribute__((packed)) n; + * }; + */ +{ + .descr = "size check test #5", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* char */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), + /* enum E { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 1), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + /* } */ + /* struct A { */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 2), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* char m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 8),/* enum E __attribute__((packed)) n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0E\0E0\0E1\0A\0m\0n", + .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check5_map", + .key_size = sizeof(int), + .value_size = 2, + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 4, +}, + +/* typedef const void * const_void_ptr; + * struct A { + * const_void_ptr m; + * }; + */ +{ + .descr = "void test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void* */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), + /* typedef const void * const_void_ptr */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const_void_ptr m; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 0), + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0const_void_ptr\0A\0m", + .str_sec_size = sizeof("\0const_void_ptr\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test1_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 4, +}, + +/* struct A { + * const void m; + * }; + */ +{ + .descr = "void test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8), + /* const void m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test2_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member", +}, + +/* typedef const void * const_void_ptr; + * const_void_ptr[4] + */ +{ + .descr = "void test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void* */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), + /* typedef const void * const_void_ptr */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* const_void_ptr[4] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [5] */ + BTF_END_RAW, + }, + .str_sec = "\0const_void_ptr", + .str_sec_size = sizeof("\0const_void_ptr"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test3_map", + .key_size = sizeof(int), + .value_size = sizeof(void *) * 4, + .key_type_id = 1, + .value_type_id = 5, + .max_entries = 4, +}, + +/* const void[4] */ +{ + .descr = "void test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void[4] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test4_map", + .key_size = sizeof(int), + .value_size = sizeof(void *) * 4, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid elem", +}, + +/* Array_A <------------------+ + * elem_type == Array_B | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array A --+ + */ +{ + .descr = "loop test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* Array_B */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test1_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* typedef is _before_ the BTF type of Array_A and Array_B + * + * typedef Array_B int_array; + * + * Array_A <------------------+ + * elem_type == int_array | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #2", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* typedef Array_B int_array */ + BTF_TYPEDEF_ENC(1, 4), /* [2] */ + /* Array_A */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */ + /* Array_B */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0int_array\0", + .str_sec_size = sizeof("\0int_array"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test2_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* Array_A <------------------+ + * elem_type == Array_B | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* Array_B */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test3_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* typedef is _between_ the BTF type of Array_A and Array_B + * + * typedef Array_B int_array; + * + * Array_A <------------------+ + * elem_type == int_array | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* typedef Array_B int_array */ /* [3] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* Array_B */ /* [4] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + BTF_END_RAW, + }, + .str_sec = "\0int_array\0", + .str_sec_size = sizeof("\0int_array"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test4_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* typedef struct B Struct_B + * + * struct A { + * int x; + * Struct_B y; + * }; + * + * struct B { + * int x; + * struct A y; + * }; + */ +{ + .descr = "loop test #5", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct A */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y; */ + /* typedef struct B Struct_B */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + /* struct B */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y; */ + BTF_END_RAW, + }, + .str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y", + .str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test5_map", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* struct A { + * int x; + * struct A array_a[4]; + * }; + */ +{ + .descr = "loop test #6", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */ + /* struct A */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */ + BTF_END_RAW, + }, + .str_sec = "\0A\0x\0y", + .str_sec_size = sizeof("\0A\0x\0y"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test6_map", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +{ + .descr = "loop test #7", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 0), + /* CONST type_id=3 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* PTR type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test7_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +{ + .descr = "loop test #8", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *m; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 0), + /* struct B { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *n; */ + BTF_MEMBER_ENC(NAME_TBD, 6, 0), + /* CONST type_id=5 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5), + /* PTR type_id=6 */ /* [5] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6), + /* CONST type_id=7 */ /* [6] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7), + /* PTR type_id=4 */ /* [7] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0B\0n", + .str_sec_size = sizeof("\0A\0m\0B\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test8_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +{ + .descr = "string section does not end with null", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int") - 1, + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid string section", +}, + +{ + .descr = "empty string section", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = 0, + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid string section", +}, + +{ + .descr = "empty type section", + .raw_types = { + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "No type found", +}, + +{ + .descr = "btf_header test. Longer hdr_len", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .hdr_len_delta = 4, + .err_str = "Unsupported btf_header", +}, + +{ + .descr = "btf_header test. Gap between hdr and type", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .type_off_delta = 4, + .err_str = "Unsupported section found", +}, + +{ + .descr = "btf_header test. Gap between type and str", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_off_delta = 4, + .err_str = "Unsupported section found", +}, + +{ + .descr = "btf_header test. Overlap between type and str", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_off_delta = -4, + .err_str = "Section overlap found", +}, + +{ + .descr = "btf_header test. Larger BTF size", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = -4, + .err_str = "Unsupported section found", +}, + +{ + .descr = "btf_header test. Smaller BTF size", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = 4, + .err_str = "Total section length too long", +}, + +{ + .descr = "array test. index_type/elem_type \"int\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type/elem_type \"const int\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 3, 16), + /* CONST type_id=1 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type \"const int:31\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int:31 */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4), + /* int[16] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(1, 4, 16), + /* CONST type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. elem_type \"const int:31\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int:31 */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4), + /* int[16] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(4, 1, 16), + /* CONST type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid array of int", +}, + +{ + .descr = "array test. index_type \"void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 0, 16), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. index_type \"const void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 3, 16), + /* CONST type_id=0 (void) */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. elem_type \"const void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 16), + /* CONST type_id=0 (void) */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid elem", +}, + +{ + .descr = "array test. elem_type \"const void *\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void *[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 16), + /* CONST type_id=4 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* void* */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type \"const void *\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void *[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 3, 16), + /* CONST type_id=4 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* void* */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. t->size != 0\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 1), + BTF_ARRAY_ENC(1, 1, 16), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "size != 0", +}, + +{ + .descr = "int test. invalid int_data", + .raw_types = { + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4), + 0x10000000, + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid int_data", +}, + +{ + .descr = "invalid BTF_INFO", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_ENC(0, 0x10000000, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info", +}, + +{ + .descr = "fwd test. t->type != 0\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* fwd type */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 1), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "type != 0", +}, + +{ + .descr = "typedef (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(0, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "typedef (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!int", + .str_sec_size = sizeof("\0__!int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "ptr type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "volatile type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "const type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "restrict type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!skb", + .str_sec_size = sizeof("\0__!skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "array type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct member (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B*", + .str_sec_size = sizeof("\0A\0B*"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B", + .str_sec_size = sizeof("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "enum type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!", + .str_sec_size = sizeof("\0A!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, +{ + .descr = "arraymap invalid btf key (a bit field)", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* 32 bit int with 32 bit offset */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 32, 32, 8), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_map_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 2, + .value_type_id = 1, + .max_entries = 4, + .map_create_err = true, +}, + +{ + .descr = "arraymap invalid btf key (!= 32 bits)", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* 16 bit int with 0 bit offset */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 16, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_map_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 2, + .value_type_id = 1, + .max_entries = 4, + .map_create_err = true, +}, + +{ + .descr = "arraymap invalid btf value (too small)", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_map_check_btf", + .key_size = sizeof(int), + /* btf_value_size < map->value_size */ + .value_size = sizeof(__u64), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .map_create_err = true, +}, + +{ + .descr = "arraymap invalid btf value (too big)", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_map_check_btf", + .key_size = sizeof(int), + /* btf_value_size > map->value_size */ + .value_size = sizeof(__u16), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .map_create_err = true, +}, + +{ + .descr = "func proto (int (*)(int, unsigned int))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* int (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int, ...) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg with name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b, ... c) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0c", + .str_sec_size = sizeof("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#3", +}, + +{ + .descr = "func proto (arg after vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, ..., unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (CONST=>TYPEDEF=>PTR=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* typedef void (*func_ptr)(int, unsigned int) */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [3] */ + /* const func_ptr */ + BTF_CONST_ENC(3), /* [4] */ + BTF_PTR_ENC(6), /* [5] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [6] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_ptr", + .str_sec_size = sizeof("\0func_ptr"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (TYPEDEF=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_typedef", + .str_sec_size = sizeof("\0func_typedef"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (btf_resolve(arg))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* void (*)(const void *) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(0, 3), + BTF_CONST_ENC(4), /* [3] */ + BTF_PTR_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Not all arg has name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0b", + .str_sec_size = sizeof("\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Bad arg name_off)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int ) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0x0fffffff, 2), + BTF_END_RAW, + }, + .str_sec = "\0a", + .str_sec_size = sizeof("\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Bad arg name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int !!!) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0!!!", + .str_sec_size = sizeof("\0a\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Invalid return type)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(100, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid return type", +}, + +{ + .descr = "func proto (with func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void func_proto(int, unsigned int) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_proto", + .str_sec_size = sizeof("\0func_proto"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func proto (const void arg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(const void) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 4), + BTF_CONST_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#1", +}, + +{ + .descr = "func (void func(int a, unsigned int b))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func (No func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void (int a, unsigned int b) */ + BTF_FUNC_ENC(0, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Invalid func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void !!!(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0!!!", + .str_sec_size = sizeof("\0a\0b\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Some arg has no name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + /* void func(int a, unsigned int) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0func", + .str_sec_size = sizeof("\0a\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func (Non zero vlen)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid func linkage", +}, + +{ + .descr = "func (Not referring to FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0func", + .str_sec_size = sizeof("\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, + +{ + .descr = "invalid int kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 1, 0), 4), /* [2] */ + BTF_INT_ENC(0, 0, 32), + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "int_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid ptr kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid array kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 1, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 1), + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid enum kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "valid fwd kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "invalid typedef kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_TYPEDEF, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid volatile kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid const kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid restrict kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid func kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 0), 0), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 1, 0), 2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid func_proto kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 1, 0), 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "valid struct, kind_flag, bitfield_size = 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 32)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, int member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 4)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, int member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, enum member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 4)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, enum member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, typedef member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 4)), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, typedef member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 0)), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "invalid struct, kind_flag, bitfield_size greater than struct size", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 20)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +{ + .descr = "invalid struct, kind_flag, bitfield base_type int not regular", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 20, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 20)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member base type", +}, + +{ + .descr = "invalid struct, kind_flag, base_type int not regular", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 12, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 8)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member base type", +}, + +{ + .descr = "invalid union, kind_flag, bitfield_size greater than struct size", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 2), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(8, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +{ + .descr = "invalid struct, kind_flag, int member, bitfield_size = 0, wrong byte alignment", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member offset", +}, + +{ + .descr = "invalid struct, kind_flag, enum member, bitfield_size = 0, wrong byte alignment", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member offset", +}, + +{ + .descr = "128-bit int", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "int_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, 128-bit int member", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, 120-bit int member bitfield", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 120, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, kind_flag, 128-bit int member", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, kind_flag, 120-bit int member bitfield", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(120, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, +/* + * typedef int arr_t[16]; + * struct s { + * arr_t *a; + * }; + */ +{ + .descr = "struct->ptr->typedef->array->int size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 5, 16), /* [4] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_mod_chain_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16, + .key_type_id = 5 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, +/* + * typedef int arr_t[16][8][4]; + * struct s { + * arr_t *a; + * }; + */ +{ + .descr = "struct->ptr->typedef->multi-array->int size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 7, 16), /* [4] */ + BTF_TYPE_ARRAY_ENC(6, 7, 8), /* [5] */ + BTF_TYPE_ARRAY_ENC(7, 7, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "multi_arr_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16 * 8 * 4, + .key_type_id = 7 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, +/* + * typedef int int_t; + * typedef int_t arr3_t[4]; + * typedef arr3_t arr2_t[8]; + * typedef arr2_t arr1_t[16]; + * struct s { + * arr1_t *a; + * }; + */ +{ + .descr = "typedef/multi-arr mix size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 10, 16), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 6), /* [5] */ + BTF_TYPE_ARRAY_ENC(7, 10, 8), /* [6] */ + BTF_TYPEDEF_ENC(NAME_TBD, 8), /* [7] */ + BTF_TYPE_ARRAY_ENC(9, 10, 4), /* [8] */ + BTF_TYPEDEF_ENC(NAME_TBD, 10), /* [9] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [10] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr1_t\0arr2_t\0arr3_t\0int_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_arra_mix_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16 * 8 * 4, + .key_type_id = 10 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, + +}; /* struct btf_raw_test raw_tests[] */ + +static const char *get_next_str(const char *start, const char *end) +{ + return start < end - 1 ? start + 1 : NULL; +} + +static int get_raw_sec_size(const __u32 *raw_types) +{ + int i; + + for (i = MAX_NR_RAW_U32 - 1; + i >= 0 && raw_types[i] != BTF_END_RAW; + i--) + ; + + return i < 0 ? i : i * sizeof(raw_types[0]); +} + +static void *btf_raw_create(const struct btf_header *hdr, + const __u32 *raw_types, + const char *str, + unsigned int str_sec_size, + unsigned int *btf_size, + const char **ret_next_str) +{ + const char *next_str = str, *end_str = str + str_sec_size; + const char **strs_idx = NULL, **tmp_strs_idx; + int strs_cap = 0, strs_cnt = 0, next_str_idx = 0; + unsigned int size_needed, offset; + struct btf_header *ret_hdr; + int i, type_sec_size, err = 0; + uint32_t *ret_types; + void *raw_btf = NULL; + + type_sec_size = get_raw_sec_size(raw_types); + if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) + return NULL; + + size_needed = sizeof(*hdr) + type_sec_size + str_sec_size; + raw_btf = malloc(size_needed); + if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf")) + return NULL; + + /* Copy header */ + memcpy(raw_btf, hdr, sizeof(*hdr)); + offset = sizeof(*hdr); + + /* Index strings */ + while ((next_str = get_next_str(next_str, end_str))) { + if (strs_cnt == strs_cap) { + strs_cap += max(16, strs_cap / 2); + tmp_strs_idx = realloc(strs_idx, + sizeof(*strs_idx) * strs_cap); + if (CHECK(!tmp_strs_idx, + "Cannot allocate memory for strs_idx")) { + err = -1; + goto done; + } + strs_idx = tmp_strs_idx; + } + strs_idx[strs_cnt++] = next_str; + next_str += strlen(next_str); + } + + /* Copy type section */ + ret_types = raw_btf + offset; + for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) { + if (raw_types[i] == NAME_TBD) { + if (CHECK(next_str_idx == strs_cnt, + "Error in getting next_str #%d", + next_str_idx)) { + err = -1; + goto done; + } + ret_types[i] = strs_idx[next_str_idx++] - str; + } else if (IS_NAME_NTH(raw_types[i])) { + int idx = GET_NAME_NTH_IDX(raw_types[i]); + + if (CHECK(idx <= 0 || idx > strs_cnt, + "Error getting string #%d, strs_cnt:%d", + idx, strs_cnt)) { + err = -1; + goto done; + } + ret_types[i] = strs_idx[idx-1] - str; + } else { + ret_types[i] = raw_types[i]; + } + } + offset += type_sec_size; + + /* Copy string section */ + memcpy(raw_btf + offset, str, str_sec_size); + + ret_hdr = (struct btf_header *)raw_btf; + ret_hdr->type_len = type_sec_size; + ret_hdr->str_off = type_sec_size; + ret_hdr->str_len = str_sec_size; + + *btf_size = size_needed; + if (ret_next_str) + *ret_next_str = + next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL; + +done: + if (err) { + if (raw_btf) + free(raw_btf); + if (strs_idx) + free(strs_idx); + return NULL; + } + return raw_btf; +} + +static void do_test_raw(unsigned int test_num) +{ + struct btf_raw_test *test = &raw_tests[test_num - 1]; + struct bpf_create_map_attr create_attr = {}; + int map_fd = -1, btf_fd = -1; + unsigned int raw_btf_size; + struct btf_header *hdr; + void *raw_btf; + int err; + + if (!test__start_subtest(test->descr)) + return; + + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size, NULL); + if (!raw_btf) + return; + + hdr = raw_btf; + + hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta; + hdr->type_off = (int)hdr->type_off + test->type_off_delta; + hdr->str_off = (int)hdr->str_off + test->str_off_delta; + hdr->str_len = (int)hdr->str_len + test->str_len_delta; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); + free(raw_btf); + + err = ((btf_fd == -1) != test->btf_load_err); + if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", + btf_fd, test->btf_load_err) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), + "expected err_str:%s", test->err_str)) { + err = -1; + goto done; + } + + if (err || btf_fd == -1) + goto done; + + create_attr.name = test->map_name; + create_attr.map_type = test->map_type; + create_attr.key_size = test->key_size; + create_attr.value_size = test->value_size; + create_attr.max_entries = test->max_entries; + create_attr.btf_fd = btf_fd; + create_attr.btf_key_type_id = test->key_type_id; + create_attr.btf_value_type_id = test->value_type_id; + + map_fd = bpf_create_map_xattr(&create_attr); + + err = ((map_fd == -1) != test->map_create_err); + CHECK(err, "map_fd:%d test->map_create_err:%u", + map_fd, test->map_create_err); + +done: + if (*btf_log_buf && (err || always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + if (btf_fd != -1) + close(btf_fd); + if (map_fd != -1) + close(map_fd); +} + +struct btf_get_info_test { + const char *descr; + const char *str_sec; + __u32 raw_types[MAX_NR_RAW_U32]; + __u32 str_sec_size; + int btf_size_delta; + int (*special_test)(unsigned int test_num); +}; + +static int test_big_btf_info(unsigned int test_num); +static int test_btf_id(unsigned int test_num); + +const struct btf_get_info_test get_info_tests[] = { +{ + .descr = "== raw_btf_size+1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .btf_size_delta = 1, +}, +{ + .descr = "== raw_btf_size-3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .btf_size_delta = -3, +}, +{ + .descr = "Large bpf_btf_info", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .special_test = test_big_btf_info, +}, +{ + .descr = "BTF ID", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned int */ /* [2] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .special_test = test_btf_id, +}, +}; + +static int test_big_btf_info(unsigned int test_num) +{ + const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; + uint8_t *raw_btf = NULL, *user_btf = NULL; + unsigned int raw_btf_size; + struct { + struct bpf_btf_info info; + uint64_t garbage; + } info_garbage; + struct bpf_btf_info *info; + int btf_fd = -1, err; + uint32_t info_len; + + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size, NULL); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + + user_btf = malloc(raw_btf_size); + if (CHECK(!user_btf, "!user_btf")) { + err = -1; + goto done; + } + + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); + if (CHECK(btf_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + /* + * GET_INFO should error out if the userspace info + * has non zero tailing bytes. + */ + info = &info_garbage.info; + memset(info, 0, sizeof(*info)); + info_garbage.garbage = 0xdeadbeef; + info_len = sizeof(info_garbage); + info->btf = ptr_to_u64(user_btf); + info->btf_size = raw_btf_size; + + err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); + if (CHECK(!err, "!err")) { + err = -1; + goto done; + } + + /* + * GET_INFO should succeed even info_len is larger than + * the kernel supported as long as tailing bytes are zero. + * The kernel supported info len should also be returned + * to userspace. + */ + info_garbage.garbage = 0; + err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); + if (CHECK(err || info_len != sizeof(*info), + "err:%d errno:%d info_len:%u sizeof(*info):%zu", + err, errno, info_len, sizeof(*info))) { + err = -1; + goto done; + } + + fprintf(stderr, "OK"); + +done: + if (*btf_log_buf && (err || always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + free(raw_btf); + free(user_btf); + + if (btf_fd != -1) + close(btf_fd); + + return err; +} + +static int test_btf_id(unsigned int test_num) +{ + const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; + struct bpf_create_map_attr create_attr = {}; + uint8_t *raw_btf = NULL, *user_btf[2] = {}; + int btf_fd[2] = {-1, -1}, map_fd = -1; + struct bpf_map_info map_info = {}; + struct bpf_btf_info info[2] = {}; + unsigned int raw_btf_size; + uint32_t info_len; + int err, i, ret; + + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size, NULL); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + + for (i = 0; i < 2; i++) { + user_btf[i] = malloc(raw_btf_size); + if (CHECK(!user_btf[i], "!user_btf[%d]", i)) { + err = -1; + goto done; + } + info[i].btf = ptr_to_u64(user_btf[i]); + info[i].btf_size = raw_btf_size; + } + + btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); + if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + /* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */ + info_len = sizeof(info[0]); + err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len); + if (CHECK(err, "errno:%d", errno)) { + err = -1; + goto done; + } + + btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id); + if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + ret = 0; + err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len); + if (CHECK(err || info[0].id != info[1].id || + info[0].btf_size != info[1].btf_size || + (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)), + "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d", + err, errno, info[0].id, info[1].id, + info[0].btf_size, info[1].btf_size, ret)) { + err = -1; + goto done; + } + + /* Test btf members in struct bpf_map_info */ + create_attr.name = "test_btf_id"; + create_attr.map_type = BPF_MAP_TYPE_ARRAY; + create_attr.key_size = sizeof(int); + create_attr.value_size = sizeof(unsigned int); + create_attr.max_entries = 4; + create_attr.btf_fd = btf_fd[0]; + create_attr.btf_key_type_id = 1; + create_attr.btf_value_type_id = 2; + + map_fd = bpf_create_map_xattr(&create_attr); + if (CHECK(map_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + info_len = sizeof(map_info); + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + if (CHECK(err || map_info.btf_id != info[0].id || + map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2, + "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u", + err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id, + map_info.btf_value_type_id)) { + err = -1; + goto done; + } + + for (i = 0; i < 2; i++) { + close(btf_fd[i]); + btf_fd[i] = -1; + } + + /* Test BTF ID is removed from the kernel */ + btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); + if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + close(btf_fd[0]); + btf_fd[0] = -1; + + /* The map holds the last ref to BTF and its btf_id */ + close(map_fd); + map_fd = -1; + btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); + if (CHECK(btf_fd[0] != -1, "BTF lingers")) { + err = -1; + goto done; + } + + fprintf(stderr, "OK"); + +done: + if (*btf_log_buf && (err || always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + free(raw_btf); + if (map_fd != -1) + close(map_fd); + for (i = 0; i < 2; i++) { + free(user_btf[i]); + if (btf_fd[i] != -1) + close(btf_fd[i]); + } + + return err; +} + +static void do_test_get_info(unsigned int test_num) +{ + const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; + unsigned int raw_btf_size, user_btf_size, expected_nbytes; + uint8_t *raw_btf = NULL, *user_btf = NULL; + struct bpf_btf_info info = {}; + int btf_fd = -1, err, ret; + uint32_t info_len; + + if (!test__start_subtest(test->descr)) + return; + + if (test->special_test) { + err = test->special_test(test_num); + if (CHECK(err, "failed: %d\n", err)) + return; + } + + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size, NULL); + + if (!raw_btf) + return; + + *btf_log_buf = '\0'; + + user_btf = malloc(raw_btf_size); + if (CHECK(!user_btf, "!user_btf")) { + err = -1; + goto done; + } + + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); + if (CHECK(btf_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + user_btf_size = (int)raw_btf_size + test->btf_size_delta; + expected_nbytes = min(raw_btf_size, user_btf_size); + if (raw_btf_size > expected_nbytes) + memset(user_btf + expected_nbytes, 0xff, + raw_btf_size - expected_nbytes); + + info_len = sizeof(info); + info.btf = ptr_to_u64(user_btf); + info.btf_size = user_btf_size; + + ret = 0; + err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len); + if (CHECK(err || !info.id || info_len != sizeof(info) || + info.btf_size != raw_btf_size || + (ret = memcmp(raw_btf, user_btf, expected_nbytes)), + "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%zu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d", + err, errno, info.id, info_len, sizeof(info), + raw_btf_size, info.btf_size, expected_nbytes, ret)) { + err = -1; + goto done; + } + + while (expected_nbytes < raw_btf_size) { + fprintf(stderr, "%u...", expected_nbytes); + if (CHECK(user_btf[expected_nbytes++] != 0xff, + "user_btf[%u]:%x != 0xff", expected_nbytes - 1, + user_btf[expected_nbytes - 1])) { + err = -1; + goto done; + } + } + + fprintf(stderr, "OK"); + +done: + if (*btf_log_buf && (err || always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + free(raw_btf); + free(user_btf); + + if (btf_fd != -1) + close(btf_fd); +} + +struct btf_file_test { + const char *file; + bool btf_kv_notfound; +}; + +static struct btf_file_test file_tests[] = { + { .file = "test_btf_haskv.o", }, + { .file = "test_btf_newkv.o", }, + { .file = "test_btf_nokv.o", .btf_kv_notfound = true, }, +}; + +static void do_test_file(unsigned int test_num) +{ + const struct btf_file_test *test = &file_tests[test_num - 1]; + const char *expected_fnames[] = {"_dummy_tracepoint", + "test_long_fname_1", + "test_long_fname_2"}; + struct btf_ext *btf_ext = NULL; + struct bpf_prog_info info = {}; + struct bpf_object *obj = NULL; + struct bpf_func_info *finfo; + struct bpf_program *prog; + __u32 info_len, rec_size; + bool has_btf_ext = false; + struct btf *btf = NULL; + void *func_info = NULL; + struct bpf_map *map; + int i, err, prog_fd; + + if (!test__start_subtest(test->file)) + return; + + btf = btf__parse_elf(test->file, &btf_ext); + if (IS_ERR(btf)) { + if (PTR_ERR(btf) == -ENOENT) { + printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC); + test__skip(); + return; + } + return; + } + btf__free(btf); + + has_btf_ext = btf_ext != NULL; + btf_ext__free(btf_ext); + + obj = bpf_object__open(test->file); + if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) + return; + + prog = bpf_program__next(NULL, obj); + if (CHECK(!prog, "Cannot find bpf_prog")) { + err = -1; + goto done; + } + + bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); + err = bpf_object__load(obj); + if (CHECK(err < 0, "bpf_object__load: %d", err)) + goto done; + prog_fd = bpf_program__fd(prog); + + map = bpf_object__find_map_by_name(obj, "btf_map"); + if (CHECK(!map, "btf_map not found")) { + err = -1; + goto done; + } + + err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0) + != test->btf_kv_notfound; + if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u", + bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map), + test->btf_kv_notfound)) + goto done; + + if (!has_btf_ext) + goto skip; + + /* get necessary program info */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + rec_size = info.func_info_rec_size; + if (CHECK(rec_size != sizeof(struct bpf_func_info), + "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { + err = -1; + goto done; + } + + func_info = malloc(info.nr_func_info * rec_size); + if (CHECK(!func_info, "out of memory")) { + err = -1; + goto done; + } + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.nr_func_info = 3; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + err = btf__get_from_id(info.btf_id, &btf); + if (CHECK(err, "cannot get btf from kernel, err: %d", err)) + goto done; + + /* check three functions */ + finfo = func_info; + for (i = 0; i < 3; i++) { + const struct btf_type *t; + const char *fname; + + t = btf__type_by_id(btf, finfo->type_id); + if (CHECK(!t, "btf__type_by_id failure: id %u", + finfo->type_id)) { + err = -1; + goto done; + } + + fname = btf__name_by_offset(btf, t->name_off); + err = strcmp(fname, expected_fnames[i]); + /* for the second and third functions in .text section, + * the compiler may order them either way. + */ + if (i && err) + err = strcmp(fname, expected_fnames[3 - i]); + if (CHECK(err, "incorrect fname %s", fname ? : "")) { + err = -1; + goto done; + } + + finfo = (void *)finfo + rec_size; + } + +skip: + fprintf(stderr, "OK"); + +done: + free(func_info); + bpf_object__close(obj); +} + +const char *pprint_enum_str[] = { + "ENUM_ZERO", + "ENUM_ONE", + "ENUM_TWO", + "ENUM_THREE", +}; + +struct pprint_mapv { + uint32_t ui32; + uint16_t ui16; + /* 2 bytes hole */ + int32_t si32; + uint32_t unused_bits2a:2, + bits28:28, + unused_bits2b:2; + union { + uint64_t ui64; + uint8_t ui8a[8]; + }; + enum { + ENUM_ZERO, + ENUM_ONE, + ENUM_TWO, + ENUM_THREE, + } aenum; + uint32_t ui32b; + uint32_t bits2c:2; + uint8_t si8_4[2][2]; +}; + +#ifdef __SIZEOF_INT128__ +struct pprint_mapv_int128 { + __int128 si128a; + __int128 si128b; + unsigned __int128 bits3:3; + unsigned __int128 bits80:80; + unsigned __int128 ui128; +}; +#endif + +static struct btf_raw_test pprint_test_template[] = { +{ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + /* 2 bits */ /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 2, 2), + /* 28 bits */ /* [7] */ + BTF_TYPE_INT_ENC(0, 0, 0, 28, 4), + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 11), 40), + BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 6, 96), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, 98), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, 224), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 6, 256), /* bits2c */ + BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */ + BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128, +}, + +{ + /* this type will have the same type as the + * first .raw_types definition, but struct type will + * be encoded with kind_flag set. + */ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40), + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 126)), /* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ + BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */ + BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128, +}, + +{ + /* this type will have the same layout as the + * first .raw_types definition. The struct type will + * be encoded with kind_flag set, bitfield members + * are added typedef/const/volatile, and bitfield members + * will have both int and enum types. + */ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40), + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 19, BTF_MEMBER_OFFSET(2, 126)),/* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ + BTF_MEMBER_ENC(NAME_TBD, 20, BTF_MEMBER_OFFSET(0, 264)), /* si8_4 */ + /* typedef unsigned int ___int */ /* [17] */ + BTF_TYPEDEF_ENC(NAME_TBD, 18), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */ + BTF_TYPE_ARRAY_ENC(21, 1, 2), /* [20] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [21] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int\0si8_4"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128, +}, + +#ifdef __SIZEOF_INT128__ +{ + /* test int128 */ + .raw_types = { + /* unsigned int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* __int128 */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 128, 16), + /* unsigned __int128 */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 128, 16), + /* struct pprint_mapv_int128 */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 5), 64), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), /* si128a */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 128)), /* si128b */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(3, 256)), /* bits3 */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(80, 259)), /* bits80 */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(0, 384)), /* ui128 */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned int\0__int128\0unsigned __int128\0pprint_mapv_int128\0si128a\0si128b\0bits3\0bits80\0ui128"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv_int128), + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 128, + .mapv_kind = PPRINT_MAPV_KIND_INT128, +}, +#endif + +}; + +static struct btf_pprint_test_meta { + const char *descr; + enum bpf_map_type map_type; + const char *map_name; + bool ordered_map; + bool lossless_map; + bool percpu_map; +} pprint_tests_meta[] = { +{ + .descr = "BTF pretty print array", + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "pprint_test_array", + .ordered_map = true, + .lossless_map = true, + .percpu_map = false, +}, + +{ + .descr = "BTF pretty print hash", + .map_type = BPF_MAP_TYPE_HASH, + .map_name = "pprint_test_hash", + .ordered_map = false, + .lossless_map = true, + .percpu_map = false, +}, + +{ + .descr = "BTF pretty print lru hash", + .map_type = BPF_MAP_TYPE_LRU_HASH, + .map_name = "pprint_test_lru_hash", + .ordered_map = false, + .lossless_map = false, + .percpu_map = false, +}, + +{ + .descr = "BTF pretty print percpu array", + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY, + .map_name = "pprint_test_percpu_array", + .ordered_map = true, + .lossless_map = true, + .percpu_map = true, +}, + +{ + .descr = "BTF pretty print percpu hash", + .map_type = BPF_MAP_TYPE_PERCPU_HASH, + .map_name = "pprint_test_percpu_hash", + .ordered_map = false, + .lossless_map = true, + .percpu_map = true, +}, + +{ + .descr = "BTF pretty print lru percpu hash", + .map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH, + .map_name = "pprint_test_lru_percpu_hash", + .ordered_map = false, + .lossless_map = false, + .percpu_map = true, +}, + +}; + +static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) +{ + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) + return sizeof(struct pprint_mapv); + +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) + return sizeof(struct pprint_mapv_int128); +#endif + + assert(0); +} + +static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, + void *mapv, uint32_t i, + int num_cpus, int rounded_value_size) +{ + int cpu; + + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { + struct pprint_mapv *v = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->ui32 = i + cpu; + v->si32 = -i; + v->unused_bits2a = 3; + v->bits28 = i; + v->unused_bits2b = 3; + v->ui64 = i; + v->aenum = i & 0x03; + v->ui32b = 4; + v->bits2c = 1; + v->si8_4[0][0] = (cpu + i) & 0xff; + v->si8_4[0][1] = (cpu + i + 1) & 0xff; + v->si8_4[1][0] = (cpu + i + 2) & 0xff; + v->si8_4[1][1] = (cpu + i + 3) & 0xff; + v = (void *)v + rounded_value_size; + } + } + +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) { + struct pprint_mapv_int128 *v = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->si128a = i; + v->si128b = -i; + v->bits3 = i & 0x07; + v->bits80 = (((unsigned __int128)1) << 64) + i; + v->ui128 = (((unsigned __int128)2) << 64) + i; + v = (void *)v + rounded_value_size; + } + } +#endif +} + +ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, + char *expected_line, ssize_t line_size, + bool percpu_map, unsigned int next_key, + int cpu, void *mapv) +{ + ssize_t nexpected_line = -1; + + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { + struct pprint_mapv *v = mapv; + + nexpected_line = snprintf(expected_line, line_size, + "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," + "{%llu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," + "%u,0x%x,[[%d,%d],[%d,%d]]}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + v->ui32, v->si32, + v->unused_bits2a, + v->bits28, + v->unused_bits2b, + (__u64)v->ui64, + v->ui8a[0], v->ui8a[1], + v->ui8a[2], v->ui8a[3], + v->ui8a[4], v->ui8a[5], + v->ui8a[6], v->ui8a[7], + pprint_enum_str[v->aenum], + v->ui32b, + v->bits2c, + v->si8_4[0][0], v->si8_4[0][1], + v->si8_4[1][0], v->si8_4[1][1]); + } + +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) { + struct pprint_mapv_int128 *v = mapv; + + nexpected_line = snprintf(expected_line, line_size, + "%s%u: {0x%lx,0x%lx,0x%lx," + "0x%lx%016lx,0x%lx%016lx}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + (uint64_t)v->si128a, + (uint64_t)v->si128b, + (uint64_t)v->bits3, + (uint64_t)(v->bits80 >> 64), + (uint64_t)v->bits80, + (uint64_t)(v->ui128 >> 64), + (uint64_t)v->ui128); + } +#endif + + return nexpected_line; +} + +static int check_line(const char *expected_line, int nexpected_line, + int expected_line_len, const char *line) +{ + if (CHECK(nexpected_line == expected_line_len, + "expected_line is too long")) + return -1; + + if (strcmp(expected_line, line)) { + fprintf(stderr, "unexpected pprint output\n"); + fprintf(stderr, "expected: %s", expected_line); + fprintf(stderr, " read: %s", line); + return -1; + } + + return 0; +} + + +static void do_test_pprint(int test_num) +{ + const struct btf_raw_test *test = &pprint_test_template[test_num]; + enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; + struct bpf_create_map_attr create_attr = {}; + bool ordered_map, lossless_map, percpu_map; + int err, ret, num_cpus, rounded_value_size; + unsigned int key, nr_read_elems; + int map_fd = -1, btf_fd = -1; + unsigned int raw_btf_size; + char expected_line[255]; + FILE *pin_file = NULL; + char pin_path[255]; + size_t line_len = 0; + char *line = NULL; + void *mapv = NULL; + uint8_t *raw_btf; + ssize_t nread; + + if (!test__start_subtest(test->descr)) + return; + + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size, NULL); + + if (!raw_btf) + return; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); + free(raw_btf); + + if (CHECK(btf_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + create_attr.name = test->map_name; + create_attr.map_type = test->map_type; + create_attr.key_size = test->key_size; + create_attr.value_size = test->value_size; + create_attr.max_entries = test->max_entries; + create_attr.btf_fd = btf_fd; + create_attr.btf_key_type_id = test->key_type_id; + create_attr.btf_value_type_id = test->value_type_id; + + map_fd = bpf_create_map_xattr(&create_attr); + if (CHECK(map_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + ret = snprintf(pin_path, sizeof(pin_path), "%s/%s", + "/sys/fs/bpf", test->map_name); + + if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long", + "/sys/fs/bpf", test->map_name)) { + err = -1; + goto done; + } + + err = bpf_obj_pin(map_fd, pin_path); + if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno)) + goto done; + + percpu_map = test->percpu_map; + num_cpus = percpu_map ? bpf_num_possible_cpus() : 1; + rounded_value_size = round_up(get_pprint_mapv_size(mapv_kind), 8); + mapv = calloc(num_cpus, rounded_value_size); + if (CHECK(!mapv, "mapv allocation failure")) { + err = -1; + goto done; + } + + for (key = 0; key < test->max_entries; key++) { + set_pprint_mapv(mapv_kind, mapv, key, num_cpus, rounded_value_size); + bpf_map_update_elem(map_fd, &key, mapv, 0); + } + + pin_file = fopen(pin_path, "r"); + if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) { + err = -1; + goto done; + } + + /* Skip lines start with '#' */ + while ((nread = getline(&line, &line_len, pin_file)) > 0 && + *line == '#') + ; + + if (CHECK(nread <= 0, "Unexpected EOF")) { + err = -1; + goto done; + } + + nr_read_elems = 0; + ordered_map = test->ordered_map; + lossless_map = test->lossless_map; + do { + ssize_t nexpected_line; + unsigned int next_key; + void *cmapv; + int cpu; + + next_key = ordered_map ? nr_read_elems : atoi(line); + set_pprint_mapv(mapv_kind, mapv, next_key, num_cpus, rounded_value_size); + cmapv = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + if (percpu_map) { + /* for percpu map, the format looks like: + * : { + * cpu0: + * cpu1: + * ... + * cpun: + * } + * + * let us verify the line containing the key here. + */ + if (cpu == 0) { + nexpected_line = snprintf(expected_line, + sizeof(expected_line), + "%u: {\n", + next_key); + + err = check_line(expected_line, nexpected_line, + sizeof(expected_line), line); + if (err == -1) + goto done; + } + + /* read value@cpu */ + nread = getline(&line, &line_len, pin_file); + if (nread < 0) + break; + } + + nexpected_line = get_pprint_expected_line(mapv_kind, expected_line, + sizeof(expected_line), + percpu_map, next_key, + cpu, cmapv); + err = check_line(expected_line, nexpected_line, + sizeof(expected_line), line); + if (err == -1) + goto done; + + cmapv = cmapv + rounded_value_size; + } + + if (percpu_map) { + /* skip the last bracket for the percpu map */ + nread = getline(&line, &line_len, pin_file); + if (nread < 0) + break; + } + + nread = getline(&line, &line_len, pin_file); + } while (++nr_read_elems < test->max_entries && nread > 0); + + if (lossless_map && + CHECK(nr_read_elems < test->max_entries, + "Unexpected EOF. nr_read_elems:%u test->max_entries:%u", + nr_read_elems, test->max_entries)) { + err = -1; + goto done; + } + + if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) { + err = -1; + goto done; + } + + err = 0; + +done: + if (mapv) + free(mapv); + if (!err) + fprintf(stderr, "OK"); + if (*btf_log_buf && (err || always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + if (btf_fd != -1) + close(btf_fd); + if (map_fd != -1) + close(map_fd); + if (pin_file) + fclose(pin_file); + unlink(pin_path); + free(line); +} + +static void test_pprint(void) +{ + unsigned int i; + + /* test various maps with the first test template */ + for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) { + pprint_test_template[0].descr = pprint_tests_meta[i].descr; + pprint_test_template[0].map_type = pprint_tests_meta[i].map_type; + pprint_test_template[0].map_name = pprint_tests_meta[i].map_name; + pprint_test_template[0].ordered_map = pprint_tests_meta[i].ordered_map; + pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map; + pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map; + + do_test_pprint(0); + } + + /* test rest test templates with the first map */ + for (i = 1; i < ARRAY_SIZE(pprint_test_template); i++) { + pprint_test_template[i].descr = pprint_tests_meta[0].descr; + pprint_test_template[i].map_type = pprint_tests_meta[0].map_type; + pprint_test_template[i].map_name = pprint_tests_meta[0].map_name; + pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map; + pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map; + pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map; + do_test_pprint(i); + } +} + +#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \ + (insn_off), (file_off), (line_off), ((line_num) << 10 | ((line_col) & 0x3ff)) + +static struct prog_info_raw_test { + const char *descr; + const char *str_sec; + const char *err_str; + __u32 raw_types[MAX_NR_RAW_U32]; + __u32 str_sec_size; + struct bpf_insn insns[MAX_INSNS]; + __u32 prog_type; + __u32 func_info[MAX_SUBPROGS][2]; + __u32 func_info_rec_size; + __u32 func_info_cnt; + __u32 line_info[MAX_NR_RAW_U32]; + __u32 line_info_rec_size; + __u32 nr_jited_ksyms; + bool expected_prog_load_failure; + __u32 dead_code_cnt; + __u32 dead_code_mask; + __u32 dead_func_cnt; + __u32 dead_func_mask; +} info_raw_tests[] = { +{ + .descr = "func_type (main func + one sub)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, +}, + +{ + .descr = "func_type (Incorrect func_info_rec_size)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 4, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect func_info_cnt)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 1, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect bpf_func_info.insn_off)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {2, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. insn_off >= prog->len)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, 0, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "line_info[4].insn_off", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (Zero bpf insn code)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0unsigned long\0u64\0u64 a=1;\0return a;"), + .insns = { + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, 0, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "Invalid insn code at line_info[1]", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog. zero tailing line_info", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 0, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. nonzero tailing line_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 1, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, + .err_str = "nonzero tailing record in line_info", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog. missing 1st func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#0", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. missing 2nd func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#1", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. unordered insn offset)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "Invalid line_info[2].insn_off", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (dead start)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0/* dead jmp */\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 1, + .dead_code_mask = 0x01, +}, + +{ + .descr = "line_info (dead end)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0/* dead jmp */\0return a + b;\0/* dead exit */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 12), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 11), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 9), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 8), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 6, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 2, + .dead_code_mask = 0x28, +}, + +{ + .descr = "line_info (dead code + subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0/* dead jmp */" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 8), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {14, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(14, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(16, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 9, + .dead_code_mask = 0x3fe, +}, + +{ + .descr = "line_info (dead subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" + "\0return 0;\0return 0;\0/* dead */\0/* dead */" + "\0/* dead */\0return bla + 1;\0return bla + 1;" + "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {6, 3}, {9, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 3, + .dead_code_mask = 0x70, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead last subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0int a=1+1;\0/* live call */" + "\0return 0;\0/* dead */\0/* dead */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 2, + .dead_code_mask = 0x18, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead subprog + dead start)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* dead */" + "\0return 0;\0return 0;\0return 0;" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0return b + 1;\0return b + 1;\0return b + 1;"), + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {7, 3}, {10, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(13, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 5, + .dead_code_mask = 0x1e2, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead subprog + dead start w/ move)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" + "\0return 0;\0return 0;\0/* dead */\0/* dead */" + "\0/* dead */\0return bla + 1;\0return bla + 1;" + "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {6, 3}, {9, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 3, + .dead_code_mask = 0x70, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead end + subprog start w/ no linfo)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 3), + BPF_CALL_REL(3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 3}, {6, 4}, }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static __u32 *patch_name_tbd(const __u32 *raw_u32, + const char *str, __u32 str_off, + unsigned int str_sec_size, + unsigned int *ret_size) +{ + int i, raw_u32_size = get_raw_sec_size(raw_u32); + const char *end_str = str + str_sec_size; + const char *next_str = str + str_off; + __u32 *new_u32 = NULL; + + if (raw_u32_size == -1) + return ERR_PTR(-EINVAL); + + if (!raw_u32_size) { + *ret_size = 0; + return NULL; + } + + new_u32 = malloc(raw_u32_size); + if (!new_u32) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < raw_u32_size / sizeof(raw_u32[0]); i++) { + if (raw_u32[i] == NAME_TBD) { + next_str = get_next_str(next_str, end_str); + if (CHECK(!next_str, "Error in getting next_str\n")) { + free(new_u32); + return ERR_PTR(-EINVAL); + } + new_u32[i] = next_str - str; + next_str += strlen(next_str); + } else { + new_u32[i] = raw_u32[i]; + } + } + + *ret_size = raw_u32_size; + return new_u32; +} + +static int test_get_finfo(const struct prog_info_raw_test *test, + int prog_fd) +{ + struct bpf_prog_info info = {}; + struct bpf_func_info *finfo; + __u32 info_len, rec_size, i; + void *func_info = NULL; + __u32 nr_func_info; + int err; + + /* get necessary lens */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + return -1; + } + nr_func_info = test->func_info_cnt - test->dead_func_cnt; + if (CHECK(info.nr_func_info != nr_func_info, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { + return -1; + } + + rec_size = info.func_info_rec_size; + if (CHECK(rec_size != sizeof(struct bpf_func_info), + "incorrect info.func_info_rec_size (1st) %d", rec_size)) { + return -1; + } + + if (!info.nr_func_info) + return 0; + + func_info = malloc(info.nr_func_info * rec_size); + if (CHECK(!func_info, "out of memory")) + return -1; + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.nr_func_info = nr_func_info; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != nr_func_info, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + finfo = func_info; + for (i = 0; i < nr_func_info; i++) { + if (test->dead_func_mask & (1 << i)) + continue; + if (CHECK(finfo->type_id != test->func_info[i][1], + "incorrect func_type %u expected %u", + finfo->type_id, test->func_info[i][1])) { + err = -1; + goto done; + } + finfo = (void *)finfo + rec_size; + } + + err = 0; + +done: + free(func_info); + return err; +} + +static int test_get_linfo(const struct prog_info_raw_test *test, + const void *patched_linfo, + __u32 cnt, int prog_fd) +{ + __u32 i, info_len, nr_jited_ksyms, nr_jited_func_lens; + __u64 *jited_linfo = NULL, *jited_ksyms = NULL; + __u32 rec_size, jited_rec_size, jited_cnt; + struct bpf_line_info *linfo = NULL; + __u32 cur_func_len, ksyms_found; + struct bpf_prog_info info = {}; + __u32 *jited_func_lens = NULL; + __u64 cur_func_ksyms; + __u32 dead_insns; + int err; + + jited_cnt = cnt; + rec_size = sizeof(*linfo); + jited_rec_size = sizeof(*jited_linfo); + if (test->nr_jited_ksyms) + nr_jited_ksyms = test->nr_jited_ksyms; + else + nr_jited_ksyms = test->func_info_cnt - test->dead_func_cnt; + nr_jited_func_lens = nr_jited_ksyms; + + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "err:%d errno:%d", err, errno)) { + err = -1; + goto done; + } + + if (!info.jited_prog_len) { + /* prog is not jited */ + jited_cnt = 0; + nr_jited_ksyms = 1; + nr_jited_func_lens = 1; + } + + if (CHECK(info.nr_line_info != cnt || + info.nr_jited_line_info != jited_cnt || + info.nr_jited_ksyms != nr_jited_ksyms || + info.nr_jited_func_lens != nr_jited_func_lens || + (!info.nr_line_info && info.nr_jited_line_info), + "info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) nr_jited_ksyms:%u(expected:%u) nr_jited_func_lens:%u(expected:%u)", + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, + info.nr_jited_ksyms, nr_jited_ksyms, + info.nr_jited_func_lens, nr_jited_func_lens)) { + err = -1; + goto done; + } + + if (CHECK(info.line_info_rec_size != sizeof(struct bpf_line_info) || + info.jited_line_info_rec_size != sizeof(__u64), + "info: line_info_rec_size:%u(userspace expected:%u) jited_line_info_rec_size:%u(userspace expected:%u)", + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size)) { + err = -1; + goto done; + } + + if (!cnt) + return 0; + + rec_size = info.line_info_rec_size; + jited_rec_size = info.jited_line_info_rec_size; + + memset(&info, 0, sizeof(info)); + + linfo = calloc(cnt, rec_size); + if (CHECK(!linfo, "!linfo")) { + err = -1; + goto done; + } + info.nr_line_info = cnt; + info.line_info_rec_size = rec_size; + info.line_info = ptr_to_u64(linfo); + + if (jited_cnt) { + jited_linfo = calloc(jited_cnt, jited_rec_size); + jited_ksyms = calloc(nr_jited_ksyms, sizeof(*jited_ksyms)); + jited_func_lens = calloc(nr_jited_func_lens, + sizeof(*jited_func_lens)); + if (CHECK(!jited_linfo || !jited_ksyms || !jited_func_lens, + "jited_linfo:%p jited_ksyms:%p jited_func_lens:%p", + jited_linfo, jited_ksyms, jited_func_lens)) { + err = -1; + goto done; + } + + info.nr_jited_line_info = jited_cnt; + info.jited_line_info_rec_size = jited_rec_size; + info.jited_line_info = ptr_to_u64(jited_linfo); + info.nr_jited_ksyms = nr_jited_ksyms; + info.jited_ksyms = ptr_to_u64(jited_ksyms); + info.nr_jited_func_lens = nr_jited_func_lens; + info.jited_func_lens = ptr_to_u64(jited_func_lens); + } + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + /* + * Only recheck the info.*line_info* fields. + * Other fields are not the concern of this test. + */ + if (CHECK(err == -1 || + info.nr_line_info != cnt || + (jited_cnt && !info.jited_line_info) || + info.nr_jited_line_info != jited_cnt || + info.line_info_rec_size != rec_size || + info.jited_line_info_rec_size != jited_rec_size, + "err:%d errno:%d info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) line_info_rec_size:%u(expected:%u) jited_linfo_rec_size:%u(expected:%u) line_info:%p jited_line_info:%p", + err, errno, + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size, + (void *)(long)info.line_info, + (void *)(long)info.jited_line_info)) { + err = -1; + goto done; + } + + dead_insns = 0; + while (test->dead_code_mask & (1 << dead_insns)) + dead_insns++; + + CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", + linfo[0].insn_off); + for (i = 1; i < cnt; i++) { + const struct bpf_line_info *expected_linfo; + + while (test->dead_code_mask & (1 << (i + dead_insns))) + dead_insns++; + + expected_linfo = patched_linfo + + ((i + dead_insns) * test->line_info_rec_size); + if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, + "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", + i, linfo[i].insn_off, + i - 1, linfo[i - 1].insn_off)) { + err = -1; + goto done; + } + if (CHECK(linfo[i].file_name_off != expected_linfo->file_name_off || + linfo[i].line_off != expected_linfo->line_off || + linfo[i].line_col != expected_linfo->line_col, + "linfo[%u] (%u, %u, %u) != (%u, %u, %u)", i, + linfo[i].file_name_off, + linfo[i].line_off, + linfo[i].line_col, + expected_linfo->file_name_off, + expected_linfo->line_off, + expected_linfo->line_col)) { + err = -1; + goto done; + } + } + + if (!jited_cnt) { + fprintf(stderr, "not jited. skipping jited_line_info check. "); + err = 0; + goto done; + } + + if (CHECK(jited_linfo[0] != jited_ksyms[0], + "jited_linfo[0]:%lx != jited_ksyms[0]:%lx", + (long)(jited_linfo[0]), (long)(jited_ksyms[0]))) { + err = -1; + goto done; + } + + ksyms_found = 1; + cur_func_len = jited_func_lens[0]; + cur_func_ksyms = jited_ksyms[0]; + for (i = 1; i < jited_cnt; i++) { + if (ksyms_found < nr_jited_ksyms && + jited_linfo[i] == jited_ksyms[ksyms_found]) { + cur_func_ksyms = jited_ksyms[ksyms_found]; + cur_func_len = jited_ksyms[ksyms_found]; + ksyms_found++; + continue; + } + + if (CHECK(jited_linfo[i] <= jited_linfo[i - 1], + "jited_linfo[%u]:%lx <= jited_linfo[%u]:%lx", + i, (long)jited_linfo[i], + i - 1, (long)(jited_linfo[i - 1]))) { + err = -1; + goto done; + } + + if (CHECK(jited_linfo[i] - cur_func_ksyms > cur_func_len, + "jited_linfo[%u]:%lx - %lx > %u", + i, (long)jited_linfo[i], (long)cur_func_ksyms, + cur_func_len)) { + err = -1; + goto done; + } + } + + if (CHECK(ksyms_found != nr_jited_ksyms, + "ksyms_found:%u != nr_jited_ksyms:%u", + ksyms_found, nr_jited_ksyms)) { + err = -1; + goto done; + } + + err = 0; + +done: + free(linfo); + free(jited_linfo); + free(jited_ksyms); + free(jited_func_lens); + return err; +} + +static void do_test_info_raw(unsigned int test_num) +{ + const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; + unsigned int raw_btf_size, linfo_str_off, linfo_size; + int btf_fd = -1, prog_fd = -1, err = 0; + void *raw_btf, *patched_linfo = NULL; + const char *ret_next_str; + union bpf_attr attr = {}; + + if (!test__start_subtest(test->descr)) + return; + + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size, &ret_next_str); + if (!raw_btf) + return; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + always_log); + free(raw_btf); + + if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { + err = -1; + goto done; + } + + if (*btf_log_buf && always_log) + fprintf(stderr, "\n%s", btf_log_buf); + *btf_log_buf = '\0'; + + linfo_str_off = ret_next_str - test->str_sec; + patched_linfo = patch_name_tbd(test->line_info, + test->str_sec, linfo_str_off, + test->str_sec_size, &linfo_size); + if (IS_ERR(patched_linfo)) { + fprintf(stderr, "error in creating raw bpf_line_info"); + err = -1; + goto done; + } + + attr.prog_type = test->prog_type; + attr.insns = ptr_to_u64(test->insns); + attr.insn_cnt = probe_prog_length(test->insns); + attr.license = ptr_to_u64("GPL"); + attr.prog_btf_fd = btf_fd; + attr.func_info_rec_size = test->func_info_rec_size; + attr.func_info_cnt = test->func_info_cnt; + attr.func_info = ptr_to_u64(test->func_info); + attr.log_buf = ptr_to_u64(btf_log_buf); + attr.log_size = BTF_LOG_BUF_SIZE; + attr.log_level = 1; + if (linfo_size) { + attr.line_info_rec_size = test->line_info_rec_size; + attr.line_info = ptr_to_u64(patched_linfo); + attr.line_info_cnt = linfo_size / attr.line_info_rec_size; + } + + prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + err = ((prog_fd == -1) != test->expected_prog_load_failure); + if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", + prog_fd, test->expected_prog_load_failure, errno) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), + "expected err_str:%s", test->err_str)) { + err = -1; + goto done; + } + + if (prog_fd == -1) + goto done; + + err = test_get_finfo(test, prog_fd); + if (err) + goto done; + + err = test_get_linfo(test, patched_linfo, + attr.line_info_cnt - test->dead_code_cnt, + prog_fd); + if (err) + goto done; + +done: + if (*btf_log_buf && (err || always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + if (btf_fd != -1) + close(btf_fd); + if (prog_fd != -1) + close(prog_fd); + + if (!IS_ERR(patched_linfo)) + free(patched_linfo); +} + +struct btf_raw_data { + __u32 raw_types[MAX_NR_RAW_U32]; + const char *str_sec; + __u32 str_sec_size; +}; + +struct btf_dedup_test { + const char *descr; + struct btf_raw_data input; + struct btf_raw_data expect; + struct btf_dedup_opts opts; +}; + +const struct btf_dedup_test dedup_tests[] = { + +{ + .descr = "dedup: unused strings filtering", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0unused\0int\0foo\0bar\0long"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: strings deduplication", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_TYPE_INT_ENC(NAME_NTH(3), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 64, 8), + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long int\0int\0long int\0int"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long int"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: struct example #1", + /* + * struct s { + * struct s *next; + * const int *a; + * int b[16]; + * int c; + * } + */ + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* int[16] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ + /* struct s { */ + BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */ + BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */ + BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */ + BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */ + /* ptr -> [3] struct s */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> [6] const int */ + BTF_PTR_ENC(6), /* [5] */ + /* const -> [1] int */ + BTF_CONST_ENC(1), /* [6] */ + + /* full copy of the above */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */ + BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */ + BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */ + BTF_MEMBER_ENC(NAME_NTH(3), 10, 0), + BTF_MEMBER_ENC(NAME_NTH(4), 11, 64), + BTF_MEMBER_ENC(NAME_NTH(5), 8, 128), + BTF_MEMBER_ENC(NAME_NTH(6), 7, 640), + BTF_PTR_ENC(9), /* [10] */ + BTF_PTR_ENC(12), /* [11] */ + BTF_CONST_ENC(7), /* [12] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"), + }, + .expect = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* int[16] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ + /* struct s { */ + BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */ + BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */ + BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */ + /* ptr -> [3] struct s */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> [6] const int */ + BTF_PTR_ENC(6), /* [5] */ + /* const -> [1] int */ + BTF_CONST_ENC(1), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: struct <-> fwd resolution w/ hash collision", + /* + * // CU 1: + * struct x; + * struct s { + * struct x *x; + * }; + * // CU 2: + * struct x {}; + * struct s { + * struct x *x; + * }; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_FWD_ENC(NAME_TBD, 0 /* struct fwd */), /* [1] fwd x */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + /* CU 2 */ + BTF_STRUCT_ENC(NAME_TBD, 0, 0), /* [4] struct x */ + BTF_PTR_ENC(4), /* [5] ptr -> [4] */ + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [6] struct s */ + BTF_MEMBER_ENC(NAME_TBD, 5, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0x\0s\0x\0x\0s\0x\0"), + }, + .expect = { + .raw_types = { + BTF_PTR_ENC(3), /* [1] ptr -> [3] */ + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [2] struct s */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_STRUCT_ENC(NAME_NTH(2), 0, 0), /* [3] struct x */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .opts = { + .dont_resolve_fwds = false, + .dedup_table_size = 1, /* force hash collisions */ + }, +}, +{ + .descr = "dedup: void equiv check", + /* + * // CU 1: + * struct s { + * struct {} *x; + * }; + * // CU 2: + * struct s { + * int *x; + * }; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + /* CU 2 */ + BTF_PTR_ENC(0), /* [4] ptr -> void */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .expect = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ + BTF_PTR_ENC(1), /* [2] ptr -> [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + /* CU 2 */ + BTF_PTR_ENC(0), /* [4] ptr -> void */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ + BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0x"), + }, + .opts = { + .dont_resolve_fwds = false, + .dedup_table_size = 1, /* force hash collisions */ + }, +}, +{ + .descr = "dedup: all possible kinds (no duplicates)", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ + BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ + BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ + BTF_PTR_ENC(0), /* [8] ptr */ + BTF_CONST_ENC(8), /* [9] const */ + BTF_VOLATILE_ENC(8), /* [10] volatile */ + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ + BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ + BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ + BTF_PTR_ENC(0), /* [8] ptr */ + BTF_CONST_ENC(8), /* [9] const */ + BTF_VOLATILE_ENC(8), /* [10] volatile */ + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: no int duplicates", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), + /* different name */ + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), + /* different encoding */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), + /* different bit offset */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), + /* different bit size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), + /* different byte size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0some other int"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), + /* different name */ + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), + /* different encoding */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), + /* different bit offset */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), + /* different bit size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), + /* different byte size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0some other int"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: enum fwd resolution", + .input = { + .raw_types = { + /* [1] fwd enum 'e1' before full enum */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [2] full enum 'e1' after fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 123), + /* [3] full enum 'e2' before fwd */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(4), 456), + /* [4] fwd enum 'e2' after full enum */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [5] incompatible fwd enum with different size */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), + /* [6] incompatible full enum with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .expect = { + .raw_types = { + /* [1] full enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 123), + /* [2] full enum 'e2' */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(4), 456), + /* [3] incompatible fwd enum with different size */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), + /* [4] incompatible full enum with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: datasec and vars pass-through", + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + /* int, referenced from [5] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */ + /* another static int t */ + BTF_VAR_ENC(NAME_NTH(2), 4, 0), /* [5] */ + /* another .bss section */ /* [6] */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(5, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0.bss\0t"), + }, + .expect = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [2] */ + /* .bss section */ /* [3] */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + /* another static int t */ + BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [4] */ + /* another .bss section */ /* [5] */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(4, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0.bss\0t"), + }, + .opts = { + .dont_resolve_fwds = false, + .dedup_table_size = 1 + }, +}, + +}; + +static int btf_type_size(const struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = BTF_INFO_VLEN(t->info); + __u16 kind = BTF_INFO_KIND(t->info); + + switch (kind) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + case BTF_KIND_VAR: + return base_size + sizeof(struct btf_var); + case BTF_KIND_DATASEC: + return base_size + vlen * sizeof(struct btf_var_secinfo); + default: + fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); + return -EINVAL; + } +} + +static void dump_btf_strings(const char *strs, __u32 len) +{ + const char *cur = strs; + int i = 0; + + while (cur < strs + len) { + fprintf(stderr, "string #%d: '%s'\n", i, cur); + cur += strlen(cur) + 1; + i++; + } +} + +static void do_test_dedup(unsigned int test_num) +{ + const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; + __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; + const struct btf_header *test_hdr, *expect_hdr; + struct btf *test_btf = NULL, *expect_btf = NULL; + const void *test_btf_data, *expect_btf_data; + const char *ret_test_next_str, *ret_expect_next_str; + const char *test_strs, *expect_strs; + const char *test_str_cur, *test_str_end; + const char *expect_str_cur, *expect_str_end; + unsigned int raw_btf_size; + void *raw_btf; + int err = 0, i; + + if (!test__start_subtest(test->descr)) + return; + + raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types, + test->input.str_sec, test->input.str_sec_size, + &raw_btf_size, &ret_test_next_str); + if (!raw_btf) + return; + + test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + free(raw_btf); + if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", + PTR_ERR(test_btf))) { + err = -1; + goto done; + } + + raw_btf = btf_raw_create(&hdr_tmpl, test->expect.raw_types, + test->expect.str_sec, + test->expect.str_sec_size, + &raw_btf_size, &ret_expect_next_str); + if (!raw_btf) + return; + expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + free(raw_btf); + if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", + PTR_ERR(expect_btf))) { + err = -1; + goto done; + } + + err = btf__dedup(test_btf, NULL, &test->opts); + if (CHECK(err, "btf_dedup failed errno:%d", err)) { + err = -1; + goto done; + } + + test_btf_data = btf__get_raw_data(test_btf, &test_btf_size); + expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size); + if (CHECK(test_btf_size != expect_btf_size, + "test_btf_size:%u != expect_btf_size:%u", + test_btf_size, expect_btf_size)) { + err = -1; + goto done; + } + + test_hdr = test_btf_data; + test_strs = test_btf_data + sizeof(*test_hdr) + test_hdr->str_off; + expect_hdr = expect_btf_data; + expect_strs = expect_btf_data + sizeof(*test_hdr) + expect_hdr->str_off; + if (CHECK(test_hdr->str_len != expect_hdr->str_len, + "test_hdr->str_len:%u != expect_hdr->str_len:%u", + test_hdr->str_len, expect_hdr->str_len)) { + fprintf(stderr, "\ntest strings:\n"); + dump_btf_strings(test_strs, test_hdr->str_len); + fprintf(stderr, "\nexpected strings:\n"); + dump_btf_strings(expect_strs, expect_hdr->str_len); + err = -1; + goto done; + } + + test_str_cur = test_strs; + test_str_end = test_strs + test_hdr->str_len; + expect_str_cur = expect_strs; + expect_str_end = expect_strs + expect_hdr->str_len; + while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { + size_t test_len, expect_len; + + test_len = strlen(test_str_cur); + expect_len = strlen(expect_str_cur); + if (CHECK(test_len != expect_len, + "test_len:%zu != expect_len:%zu " + "(test_str:%s, expect_str:%s)", + test_len, expect_len, test_str_cur, expect_str_cur)) { + err = -1; + goto done; + } + if (CHECK(strcmp(test_str_cur, expect_str_cur), + "test_str:%s != expect_str:%s", + test_str_cur, expect_str_cur)) { + err = -1; + goto done; + } + test_str_cur += test_len + 1; + expect_str_cur += expect_len + 1; + } + if (CHECK(test_str_cur != test_str_end, + "test_str_cur:%p != test_str_end:%p", + test_str_cur, test_str_end)) { + err = -1; + goto done; + } + + test_nr_types = btf__get_nr_types(test_btf); + expect_nr_types = btf__get_nr_types(expect_btf); + if (CHECK(test_nr_types != expect_nr_types, + "test_nr_types:%u != expect_nr_types:%u", + test_nr_types, expect_nr_types)) { + err = -1; + goto done; + } + + for (i = 1; i <= test_nr_types; i++) { + const struct btf_type *test_type, *expect_type; + int test_size, expect_size; + + test_type = btf__type_by_id(test_btf, i); + expect_type = btf__type_by_id(expect_btf, i); + test_size = btf_type_size(test_type); + expect_size = btf_type_size(expect_type); + + if (CHECK(test_size != expect_size, + "type #%d: test_size:%d != expect_size:%u", + i, test_size, expect_size)) { + err = -1; + goto done; + } + if (CHECK(memcmp((void *)test_type, + (void *)expect_type, + test_size), + "type #%d: contents differ", i)) { + err = -1; + goto done; + } + } + +done: + if (!IS_ERR(test_btf)) + btf__free(test_btf); + if (!IS_ERR(expect_btf)) + btf__free(expect_btf); +} + +void test_btf(void) +{ + int i; + + always_log = env.verbosity > VERBOSE_NONE; + + for (i = 1; i <= ARRAY_SIZE(raw_tests); i++) + do_test_raw(i); + for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++) + do_test_get_info(i); + for (i = 1; i <= ARRAY_SIZE(file_tests); i++) + do_test_file(i); + for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) + do_test_info_raw(i); + for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++) + do_test_dedup(i); + test_pprint(); +} diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c deleted file mode 100644 index c75fc6447186..000000000000 --- a/tools/testing/selftests/bpf/test_btf.c +++ /dev/null @@ -1,7067 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2018 Facebook */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bpf_rlimit.h" -#include "bpf_util.h" -#include "test_btf.h" - -#define MAX_INSNS 512 -#define MAX_SUBPROGS 16 - -static uint32_t pass_cnt; -static uint32_t error_cnt; -static uint32_t skip_cnt; - -#define CHECK(condition, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ - fprintf(stderr, format); \ - } \ - __ret; \ -}) - -static int count_result(int err) -{ - if (err) - error_cnt++; - else - pass_cnt++; - - fprintf(stderr, "\n"); - return err; -} - -static int __base_pr(enum libbpf_print_level level __attribute__((unused)), - const char *format, va_list args) -{ - return vfprintf(stderr, format, args); -} - -#define BTF_END_RAW 0xdeadbeef -#define NAME_TBD 0xdeadb33f - -#define NAME_NTH(N) (0xffff0000 | N) -#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000) -#define GET_NAME_NTH_IDX(X) (X & 0x0000ffff) - -#define MAX_NR_RAW_U32 1024 -#define BTF_LOG_BUF_SIZE 65535 - -static struct args { - unsigned int raw_test_num; - unsigned int file_test_num; - unsigned int get_info_test_num; - unsigned int info_raw_test_num; - unsigned int dedup_test_num; - bool raw_test; - bool file_test; - bool get_info_test; - bool pprint_test; - bool always_log; - bool info_raw_test; - bool dedup_test; -} args; - -static char btf_log_buf[BTF_LOG_BUF_SIZE]; - -static struct btf_header hdr_tmpl = { - .magic = BTF_MAGIC, - .version = BTF_VERSION, - .hdr_len = sizeof(struct btf_header), -}; - -/* several different mapv kinds(types) supported by pprint */ -enum pprint_mapv_kind_t { - PPRINT_MAPV_KIND_BASIC = 0, - PPRINT_MAPV_KIND_INT128, -}; - -struct btf_raw_test { - const char *descr; - const char *str_sec; - const char *map_name; - const char *err_str; - __u32 raw_types[MAX_NR_RAW_U32]; - __u32 str_sec_size; - enum bpf_map_type map_type; - __u32 key_size; - __u32 value_size; - __u32 key_type_id; - __u32 value_type_id; - __u32 max_entries; - bool btf_load_err; - bool map_create_err; - bool ordered_map; - bool lossless_map; - bool percpu_map; - int hdr_len_delta; - int type_off_delta; - int str_off_delta; - int str_len_delta; - enum pprint_mapv_kind_t mapv_kind; -}; - -#define BTF_STR_SEC(str) \ - .str_sec = str, .str_sec_size = sizeof(str) - -static struct btf_raw_test raw_tests[] = { -/* enum E { - * E0, - * E1, - * }; - * - * struct A { - * unsigned long long m; - * int n; - * char o; - * [3 bytes hole] - * int p[8]; - * int q[4][8]; - * enum E r; - * }; - */ -{ - .descr = "struct test #1", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */ - BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */ - /* } */ - /* int[4][8] */ - BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */ - /* enum E */ /* [7] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_ENUM_ENC(NAME_TBD, 1), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_test1_map", - .key_size = sizeof(int), - .value_size = 180, - .key_type_id = 1, - .value_type_id = 5, - .max_entries = 4, -}, - -/* typedef struct b Struct_B; - * - * struct A { - * int m; - * struct b n[4]; - * const Struct_B o[4]; - * }; - * - * struct B { - * int m; - * int n; - * }; - */ -{ - .descr = "struct test #2", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* struct b [4] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(4, 1, 4), - - /* struct A { */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4] */ - BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/ - /* } */ - - /* struct B { */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */ - /* } */ - - /* const int */ /* [5] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), - /* typedef struct b Struct_B */ /* [6] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4), - /* const Struct_B */ /* [7] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6), - /* const Struct_B [4] */ /* [8] */ - BTF_TYPE_ARRAY_ENC(7, 1, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B", - .str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_test2_map", - .key_size = sizeof(int), - .value_size = 68, - .key_type_id = 1, - .value_type_id = 3, - .max_entries = 4, -}, -{ - .descr = "struct test #3 Invalid member offset", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int64 */ /* [2] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), - - /* struct A { */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 16), - BTF_MEMBER_ENC(NAME_TBD, 1, 64), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* int64 n; */ - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0", - .str_sec_size = sizeof("\0A\0m\0n\0"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_test3_map", - .key_size = sizeof(int), - .value_size = 16, - .key_type_id = 1, - .value_type_id = 3, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid member bits_offset", -}, -/* - * struct A { - * unsigned long long m; - * int n; - * char o; - * [3 bytes hole] - * int p[8]; - * }; - */ -{ - .descr = "global data test #1", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_test1_map", - .key_size = sizeof(int), - .value_size = 48, - .key_type_id = 1, - .value_type_id = 5, - .max_entries = 4, -}, -/* - * struct A { - * unsigned long long m; - * int n; - * char o; - * [3 bytes hole] - * int p[8]; - * }; - * static struct A t; <- in .bss - */ -{ - .descr = "global data test #2", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - /* static struct A t */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ - /* .bss section */ /* [7] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48), - BTF_VAR_SECINFO_ENC(6, 0, 48), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 48, - .key_type_id = 0, - .value_type_id = 7, - .max_entries = 1, -}, -{ - .descr = "global data test #3", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* static int t */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - /* .bss section */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - BTF_END_RAW, - }, - .str_sec = "\0t\0.bss", - .str_sec_size = sizeof("\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 3, - .max_entries = 1, -}, -{ - .descr = "global data test #4, unsupported linkage", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* static int t */ - BTF_VAR_ENC(NAME_TBD, 1, 2), /* [2] */ - /* .bss section */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - BTF_END_RAW, - }, - .str_sec = "\0t\0.bss", - .str_sec_size = sizeof("\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 3, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Linkage not supported", -}, -{ - .descr = "global data test #5, invalid var type", - .raw_types = { - /* static void t */ - BTF_VAR_ENC(NAME_TBD, 0, 0), /* [1] */ - /* .bss section */ /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(1, 0, 4), - BTF_END_RAW, - }, - .str_sec = "\0t\0.bss", - .str_sec_size = sizeof("\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 2, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type_id", -}, -{ - .descr = "global data test #6, invalid var type (fwd type)", - .raw_types = { - /* union A */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */ - /* static union A t */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - /* .bss section */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0.bss", - .str_sec_size = sizeof("\0A\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 2, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type", -}, -{ - .descr = "global data test #7, invalid var type (fwd type)", - .raw_types = { - /* union A */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */ - /* static union A t */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - /* .bss section */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(1, 0, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0.bss", - .str_sec_size = sizeof("\0A\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 2, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type", -}, -{ - .descr = "global data test #8, invalid var size", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - /* static struct A t */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ - /* .bss section */ /* [7] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48), - BTF_VAR_SECINFO_ENC(6, 0, 47), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 48, - .key_type_id = 0, - .value_type_id = 7, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid size", -}, -{ - .descr = "global data test #9, invalid var size", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - /* static struct A t */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ - /* .bss section */ /* [7] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46), - BTF_VAR_SECINFO_ENC(6, 0, 48), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 48, - .key_type_id = 0, - .value_type_id = 7, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid size", -}, -{ - .descr = "global data test #10, invalid var size", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - /* static struct A t */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ - /* .bss section */ /* [7] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46), - BTF_VAR_SECINFO_ENC(6, 0, 46), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 48, - .key_type_id = 0, - .value_type_id = 7, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid size", -}, -{ - .descr = "global data test #11, multiple section members", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - /* static struct A t */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ - /* static int u */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ - /* .bss section */ /* [8] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), - BTF_VAR_SECINFO_ENC(6, 10, 48), - BTF_VAR_SECINFO_ENC(7, 58, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 62, - .key_type_id = 0, - .value_type_id = 8, - .max_entries = 1, -}, -{ - .descr = "global data test #12, invalid offset", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - /* static struct A t */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ - /* static int u */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ - /* .bss section */ /* [8] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), - BTF_VAR_SECINFO_ENC(6, 10, 48), - BTF_VAR_SECINFO_ENC(7, 60, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 62, - .key_type_id = 0, - .value_type_id = 8, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid offset+size", -}, -{ - .descr = "global data test #13, invalid offset", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - /* static struct A t */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ - /* static int u */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ - /* .bss section */ /* [8] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), - BTF_VAR_SECINFO_ENC(6, 10, 48), - BTF_VAR_SECINFO_ENC(7, 12, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 62, - .key_type_id = 0, - .value_type_id = 8, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid offset", -}, -{ - .descr = "global data test #14, invalid offset", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* unsigned long long */ - BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ - /* char */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ - /* int[8] */ - BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ - BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ - /* } */ - /* static struct A t */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ - /* static int u */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */ - /* .bss section */ /* [8] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62), - BTF_VAR_SECINFO_ENC(7, 58, 4), - BTF_VAR_SECINFO_ENC(6, 10, 48), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss", - .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 62, - .key_type_id = 0, - .value_type_id = 8, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid offset", -}, -{ - .descr = "global data test #15, not var kind", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - /* .bss section */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(1, 0, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0.bss", - .str_sec_size = sizeof("\0A\0t\0.bss"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 3, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Not a VAR kind member", -}, -{ - .descr = "global data test #16, invalid var referencing sec", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 5, 0), /* [2] */ - BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */ - /* a section */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(3, 0, 4), - /* a section */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(6, 0, 4), - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [6] */ - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0s\0a\0a", - .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 4, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type_id", -}, -{ - .descr = "global data test #17, invalid var referencing var", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */ - /* a section */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(3, 0, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0s\0a\0a", - .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 4, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type_id", -}, -{ - .descr = "global data test #18, invalid var loop", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 2, 0), /* [2] */ - /* .bss section */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0aaa", - .str_sec_size = sizeof("\0A\0t\0aaa"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 4, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type_id", -}, -{ - .descr = "global data test #19, invalid var referencing var", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_VAR_ENC(NAME_TBD, 3, 0), /* [2] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0s\0a\0a", - .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 4, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type_id", -}, -{ - .descr = "global data test #20, invalid ptr referencing var", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* PTR type_id=3 */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0s\0a\0a", - .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 4, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid type_id", -}, -{ - .descr = "global data test #21, var included in struct", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* struct A { */ /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* VAR type_id=3; */ - /* } */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0s\0a\0a", - .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 4, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid member", -}, -{ - .descr = "global data test #22, array of var", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */ - BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */ - BTF_END_RAW, - }, - .str_sec = "\0A\0t\0s\0a\0a", - .str_sec_size = sizeof("\0A\0t\0s\0a\0a"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = ".bss", - .key_size = sizeof(int), - .value_size = 4, - .key_type_id = 0, - .value_type_id = 4, - .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid elem", -}, -/* Test member exceeds the size of struct. - * - * struct A { - * int m; - * int n; - * }; - */ -{ - .descr = "size check test #1", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* struct A { */ /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */ - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n", - .str_sec_size = sizeof("\0A\0m\0n"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "size_check1_map", - .key_size = sizeof(int), - .value_size = 1, - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Member exceeds struct_size", -}, - -/* Test member exeeds the size of struct - * - * struct A { - * int m; - * int n[2]; - * }; - */ -{ - .descr = "size check test #2", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), - /* int[2] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(1, 1, 2), - /* struct A { */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */ - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n", - .str_sec_size = sizeof("\0A\0m\0n"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "size_check2_map", - .key_size = sizeof(int), - .value_size = 1, - .key_type_id = 1, - .value_type_id = 3, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Member exceeds struct_size", -}, - -/* Test member exeeds the size of struct - * - * struct A { - * int m; - * void *n; - * }; - */ -{ - .descr = "size check test #3", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), - /* void* */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), - /* struct A { */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */ - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0n", - .str_sec_size = sizeof("\0A\0m\0n"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "size_check3_map", - .key_size = sizeof(int), - .value_size = 1, - .key_type_id = 1, - .value_type_id = 3, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Member exceeds struct_size", -}, - -/* Test member exceeds the size of struct - * - * enum E { - * E0, - * E1, - * }; - * - * struct A { - * int m; - * enum E n; - * }; - */ -{ - .descr = "size check test #4", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), - /* enum E { */ /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_ENUM_ENC(NAME_TBD, 1), - /* } */ - /* struct A { */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */ - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0E\0E0\0E1\0A\0m\0n", - .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "size_check4_map", - .key_size = sizeof(int), - .value_size = 1, - .key_type_id = 1, - .value_type_id = 3, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Member exceeds struct_size", -}, - -/* Test member unexceeds the size of struct - * - * enum E { - * E0, - * E1, - * }; - * - * struct A { - * char m; - * enum E __attribute__((packed)) n; - * }; - */ -{ - .descr = "size check test #5", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), - /* char */ /* [2] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), - /* enum E { */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 1), - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_ENUM_ENC(NAME_TBD, 1), - /* } */ - /* struct A { */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 2), - BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* char m; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 8),/* enum E __attribute__((packed)) n; */ - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0E\0E0\0E1\0A\0m\0n", - .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "size_check5_map", - .key_size = sizeof(int), - .value_size = 2, - .key_type_id = 1, - .value_type_id = 4, - .max_entries = 4, -}, - -/* typedef const void * const_void_ptr; - * struct A { - * const_void_ptr m; - * }; - */ -{ - .descr = "void test #1", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* const void */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), - /* const void* */ /* [3] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), - /* typedef const void * const_void_ptr */ - BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ - /* struct A { */ /* [5] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), - /* const_void_ptr m; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 0), - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0const_void_ptr\0A\0m", - .str_sec_size = sizeof("\0const_void_ptr\0A\0m"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "void_test1_map", - .key_size = sizeof(int), - .value_size = sizeof(void *), - .key_type_id = 1, - .value_type_id = 4, - .max_entries = 4, -}, - -/* struct A { - * const void m; - * }; - */ -{ - .descr = "void test #2", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* const void */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), - /* struct A { */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8), - /* const void m; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 0), - /* } */ - BTF_END_RAW, - }, - .str_sec = "\0A\0m", - .str_sec_size = sizeof("\0A\0m"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "void_test2_map", - .key_size = sizeof(int), - .value_size = sizeof(void *), - .key_type_id = 1, - .value_type_id = 3, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid member", -}, - -/* typedef const void * const_void_ptr; - * const_void_ptr[4] - */ -{ - .descr = "void test #3", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* const void */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), - /* const void* */ /* [3] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), - /* typedef const void * const_void_ptr */ - BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ - /* const_void_ptr[4] */ - BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [5] */ - BTF_END_RAW, - }, - .str_sec = "\0const_void_ptr", - .str_sec_size = sizeof("\0const_void_ptr"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "void_test3_map", - .key_size = sizeof(int), - .value_size = sizeof(void *) * 4, - .key_type_id = 1, - .value_type_id = 5, - .max_entries = 4, -}, - -/* const void[4] */ -{ - .descr = "void test #4", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* const void */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), - /* const void[4] */ /* [3] */ - BTF_TYPE_ARRAY_ENC(2, 1, 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0m", - .str_sec_size = sizeof("\0A\0m"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "void_test4_map", - .key_size = sizeof(int), - .value_size = sizeof(void *) * 4, - .key_type_id = 1, - .value_type_id = 3, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid elem", -}, - -/* Array_A <------------------+ - * elem_type == Array_B | - * | | - * | | - * Array_B <-------- + | - * elem_type == Array A --+ - */ -{ - .descr = "loop test #1", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* Array_A */ /* [2] */ - BTF_TYPE_ARRAY_ENC(3, 1, 8), - /* Array_B */ /* [3] */ - BTF_TYPE_ARRAY_ENC(2, 1, 8), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "loop_test1_map", - .key_size = sizeof(int), - .value_size = sizeof(sizeof(int) * 8), - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Loop detected", -}, - -/* typedef is _before_ the BTF type of Array_A and Array_B - * - * typedef Array_B int_array; - * - * Array_A <------------------+ - * elem_type == int_array | - * | | - * | | - * Array_B <-------- + | - * elem_type == Array_A --+ - */ -{ - .descr = "loop test #2", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* typedef Array_B int_array */ - BTF_TYPEDEF_ENC(1, 4), /* [2] */ - /* Array_A */ - BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */ - /* Array_B */ - BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */ - BTF_END_RAW, - }, - .str_sec = "\0int_array\0", - .str_sec_size = sizeof("\0int_array"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "loop_test2_map", - .key_size = sizeof(int), - .value_size = sizeof(sizeof(int) * 8), - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Loop detected", -}, - -/* Array_A <------------------+ - * elem_type == Array_B | - * | | - * | | - * Array_B <-------- + | - * elem_type == Array_A --+ - */ -{ - .descr = "loop test #3", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* Array_A */ /* [2] */ - BTF_TYPE_ARRAY_ENC(3, 1, 8), - /* Array_B */ /* [3] */ - BTF_TYPE_ARRAY_ENC(2, 1, 8), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "loop_test3_map", - .key_size = sizeof(int), - .value_size = sizeof(sizeof(int) * 8), - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Loop detected", -}, - -/* typedef is _between_ the BTF type of Array_A and Array_B - * - * typedef Array_B int_array; - * - * Array_A <------------------+ - * elem_type == int_array | - * | | - * | | - * Array_B <-------- + | - * elem_type == Array_A --+ - */ -{ - .descr = "loop test #4", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* Array_A */ /* [2] */ - BTF_TYPE_ARRAY_ENC(3, 1, 8), - /* typedef Array_B int_array */ /* [3] */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), - /* Array_B */ /* [4] */ - BTF_TYPE_ARRAY_ENC(2, 1, 8), - BTF_END_RAW, - }, - .str_sec = "\0int_array\0", - .str_sec_size = sizeof("\0int_array"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "loop_test4_map", - .key_size = sizeof(int), - .value_size = sizeof(sizeof(int) * 8), - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Loop detected", -}, - -/* typedef struct B Struct_B - * - * struct A { - * int x; - * Struct_B y; - * }; - * - * struct B { - * int x; - * struct A y; - * }; - */ -{ - .descr = "loop test #5", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* struct A */ /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y; */ - /* typedef struct B Struct_B */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ - /* struct B */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y; */ - BTF_END_RAW, - }, - .str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y", - .str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "loop_test5_map", - .key_size = sizeof(int), - .value_size = 8, - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Loop detected", -}, - -/* struct A { - * int x; - * struct A array_a[4]; - * }; - */ -{ - .descr = "loop test #6", - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */ - /* struct A */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */ - BTF_END_RAW, - }, - .str_sec = "\0A\0x\0y", - .str_sec_size = sizeof("\0A\0x\0y"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "loop_test6_map", - .key_size = sizeof(int), - .value_size = 8, - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Loop detected", -}, - -{ - .descr = "loop test #7", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* struct A { */ /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), - /* const void *m; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 0), - /* CONST type_id=3 */ /* [3] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), - /* PTR type_id=2 */ /* [4] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - BTF_END_RAW, - }, - .str_sec = "\0A\0m", - .str_sec_size = sizeof("\0A\0m"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "loop_test7_map", - .key_size = sizeof(int), - .value_size = sizeof(void *), - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Loop detected", -}, - -{ - .descr = "loop test #8", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* struct A { */ /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), - /* const void *m; */ - BTF_MEMBER_ENC(NAME_TBD, 4, 0), - /* struct B { */ /* [3] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), - /* const void *n; */ - BTF_MEMBER_ENC(NAME_TBD, 6, 0), - /* CONST type_id=5 */ /* [4] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5), - /* PTR type_id=6 */ /* [5] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6), - /* CONST type_id=7 */ /* [6] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7), - /* PTR type_id=4 */ /* [7] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4), - BTF_END_RAW, - }, - .str_sec = "\0A\0m\0B\0n", - .str_sec_size = sizeof("\0A\0m\0B\0n"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "loop_test8_map", - .key_size = sizeof(int), - .value_size = sizeof(void *), - .key_type_id = 1, - .value_type_id = 2, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Loop detected", -}, - -{ - .descr = "string section does not end with null", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "\0int", - .str_sec_size = sizeof("\0int") - 1, - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid string section", -}, - -{ - .descr = "empty string section", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = 0, - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid string section", -}, - -{ - .descr = "empty type section", - .raw_types = { - BTF_END_RAW, - }, - .str_sec = "\0int", - .str_sec_size = sizeof("\0int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "No type found", -}, - -{ - .descr = "btf_header test. Longer hdr_len", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "\0int", - .str_sec_size = sizeof("\0int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .hdr_len_delta = 4, - .err_str = "Unsupported btf_header", -}, - -{ - .descr = "btf_header test. Gap between hdr and type", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "\0int", - .str_sec_size = sizeof("\0int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .type_off_delta = 4, - .err_str = "Unsupported section found", -}, - -{ - .descr = "btf_header test. Gap between type and str", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "\0int", - .str_sec_size = sizeof("\0int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .str_off_delta = 4, - .err_str = "Unsupported section found", -}, - -{ - .descr = "btf_header test. Overlap between type and str", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "\0int", - .str_sec_size = sizeof("\0int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .str_off_delta = -4, - .err_str = "Section overlap found", -}, - -{ - .descr = "btf_header test. Larger BTF size", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "\0int", - .str_sec_size = sizeof("\0int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .str_len_delta = -4, - .err_str = "Unsupported section found", -}, - -{ - .descr = "btf_header test. Smaller BTF size", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "\0int", - .str_sec_size = sizeof("\0int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "hdr_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .str_len_delta = 4, - .err_str = "Total section length too long", -}, - -{ - .descr = "array test. index_type/elem_type \"int\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int[16] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(1, 1, 16), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "array test. index_type/elem_type \"const int\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int[16] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(3, 3, 16), - /* CONST type_id=1 */ /* [3] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "array test. index_type \"const int:31\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int:31 */ /* [2] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4), - /* int[16] */ /* [3] */ - BTF_TYPE_ARRAY_ENC(1, 4, 16), - /* CONST type_id=2 */ /* [4] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid index", -}, - -{ - .descr = "array test. elem_type \"const int:31\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int:31 */ /* [2] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4), - /* int[16] */ /* [3] */ - BTF_TYPE_ARRAY_ENC(4, 1, 16), - /* CONST type_id=2 */ /* [4] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid array of int", -}, - -{ - .descr = "array test. index_type \"void\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int[16] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(1, 0, 16), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid index", -}, - -{ - .descr = "array test. index_type \"const void\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int[16] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(1, 3, 16), - /* CONST type_id=0 (void) */ /* [3] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid index", -}, - -{ - .descr = "array test. elem_type \"const void\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int[16] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(3, 1, 16), - /* CONST type_id=0 (void) */ /* [3] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid elem", -}, - -{ - .descr = "array test. elem_type \"const void *\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* const void *[16] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(3, 1, 16), - /* CONST type_id=4 */ /* [3] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), - /* void* */ /* [4] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "array test. index_type \"const void *\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* const void *[16] */ /* [2] */ - BTF_TYPE_ARRAY_ENC(3, 3, 16), - /* CONST type_id=4 */ /* [3] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), - /* void* */ /* [4] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid index", -}, - -{ - .descr = "array test. t->size != 0\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* int[16] */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 1), - BTF_ARRAY_ENC(1, 1, 16), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "size != 0", -}, - -{ - .descr = "int test. invalid int_data", - .raw_types = { - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4), - 0x10000000, - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid int_data", -}, - -{ - .descr = "invalid BTF_INFO", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_TYPE_ENC(0, 0x10000000, 4), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info", -}, - -{ - .descr = "fwd test. t->type != 0\"", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* fwd type */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 1), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "fwd_test_map", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "type != 0", -}, - -{ - .descr = "typedef (invalid name, name_off = 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPEDEF_ENC(0, 1), /* [2] */ - BTF_END_RAW, - }, - .str_sec = "\0__int", - .str_sec_size = sizeof("\0__int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "typedef_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "typedef (invalid name, invalid identifier)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ - BTF_END_RAW, - }, - .str_sec = "\0__!int", - .str_sec_size = sizeof("\0__!int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "typedef_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "ptr type (invalid name, name_off <> 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ - BTF_END_RAW, - }, - .str_sec = "\0__int", - .str_sec_size = sizeof("\0__int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "ptr_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "volatile type (invalid name, name_off <> 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1), /* [2] */ - BTF_END_RAW, - }, - .str_sec = "\0__int", - .str_sec_size = sizeof("\0__int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "volatile_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "const type (invalid name, name_off <> 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), /* [2] */ - BTF_END_RAW, - }, - .str_sec = "\0__int", - .str_sec_size = sizeof("\0__int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "const_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "restrict type (invalid name, name_off <> 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2), /* [3] */ - BTF_END_RAW, - }, - .str_sec = "\0__int", - .str_sec_size = sizeof("\0__int"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "restrict_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "fwd type (invalid name, name_off = 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ - BTF_END_RAW, - }, - .str_sec = "\0__skb", - .str_sec_size = sizeof("\0__skb"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "fwd_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "fwd type (invalid name, invalid identifier)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ - BTF_END_RAW, - }, - .str_sec = "\0__!skb", - .str_sec_size = sizeof("\0__!skb"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "fwd_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "array type (invalid name, name_off <> 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), /* [2] */ - BTF_ARRAY_ENC(1, 1, 4), - BTF_END_RAW, - }, - .str_sec = "\0__skb", - .str_sec_size = sizeof("\0__skb"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "struct type (name_off = 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, - BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_END_RAW, - }, - .str_sec = "\0A", - .str_sec_size = sizeof("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "struct type (invalid name, invalid identifier)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_END_RAW, - }, - .str_sec = "\0A!\0B", - .str_sec_size = sizeof("\0A!\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "struct member (name_off = 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, - BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_END_RAW, - }, - .str_sec = "\0A", - .str_sec_size = sizeof("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "struct member (invalid name, invalid identifier)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_END_RAW, - }, - .str_sec = "\0A\0B*", - .str_sec_size = sizeof("\0A\0B*"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "enum type (name_off = 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, - BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), - sizeof(int)), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - .str_sec = "\0A\0B", - .str_sec_size = sizeof("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "enum type (invalid name, invalid identifier)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), - sizeof(int)), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - .str_sec = "\0A!\0B", - .str_sec_size = sizeof("\0A!\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "enum member (invalid name, name_off = 0)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, - BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), - sizeof(int)), /* [2] */ - BTF_ENUM_ENC(0, 0), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "enum member (invalid name, invalid identifier)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, - BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), - sizeof(int)), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - .str_sec = "\0A!", - .str_sec_size = sizeof("\0A!"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, -{ - .descr = "arraymap invalid btf key (a bit field)", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* 32 bit int with 32 bit offset */ /* [2] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 32, 32, 8), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_map_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 2, - .value_type_id = 1, - .max_entries = 4, - .map_create_err = true, -}, - -{ - .descr = "arraymap invalid btf key (!= 32 bits)", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* 16 bit int with 0 bit offset */ /* [2] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 16, 2), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_map_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 2, - .value_type_id = 1, - .max_entries = 4, - .map_create_err = true, -}, - -{ - .descr = "arraymap invalid btf value (too small)", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_map_check_btf", - .key_size = sizeof(int), - /* btf_value_size < map->value_size */ - .value_size = sizeof(__u64), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .map_create_err = true, -}, - -{ - .descr = "arraymap invalid btf value (too big)", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_map_check_btf", - .key_size = sizeof(int), - /* btf_value_size > map->value_size */ - .value_size = sizeof(__u16), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .map_create_err = true, -}, - -{ - .descr = "func proto (int (*)(int, unsigned int))", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* int (*)(int, unsigned int) */ - BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(0, 1), - BTF_FUNC_PROTO_ARG_ENC(0, 2), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "func proto (vararg)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int, unsigned int, ...) */ - BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(0, 1), - BTF_FUNC_PROTO_ARG_ENC(0, 2), - BTF_FUNC_PROTO_ARG_ENC(0, 0), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "func proto (vararg with name)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, unsigned int b, ... c) */ - BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - .str_sec = "\0a\0b\0c", - .str_sec_size = sizeof("\0a\0b\0c"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid arg#3", -}, - -{ - .descr = "func proto (arg after vararg)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, ..., unsigned int b) */ - BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(0, 0), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_END_RAW, - }, - .str_sec = "\0a\0b", - .str_sec_size = sizeof("\0a\0b"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid arg#2", -}, - -{ - .descr = "func proto (CONST=>TYPEDEF=>PTR=>FUNC_PROTO)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* typedef void (*func_ptr)(int, unsigned int) */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [3] */ - /* const func_ptr */ - BTF_CONST_ENC(3), /* [4] */ - BTF_PTR_ENC(6), /* [5] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [6] */ - BTF_FUNC_PROTO_ARG_ENC(0, 1), - BTF_FUNC_PROTO_ARG_ENC(0, 2), - BTF_END_RAW, - }, - .str_sec = "\0func_ptr", - .str_sec_size = sizeof("\0func_ptr"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "func proto (TYPEDEF=>FUNC_PROTO)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ - BTF_FUNC_PROTO_ARG_ENC(0, 1), - BTF_FUNC_PROTO_ARG_ENC(0, 2), - BTF_END_RAW, - }, - .str_sec = "\0func_typedef", - .str_sec_size = sizeof("\0func_typedef"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "func proto (btf_resolve(arg))", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* void (*)(const void *) */ - BTF_FUNC_PROTO_ENC(0, 1), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(0, 3), - BTF_CONST_ENC(4), /* [3] */ - BTF_PTR_ENC(0), /* [4] */ - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "func proto (Not all arg has name)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int, unsigned int b) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(0, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_END_RAW, - }, - .str_sec = "\0b", - .str_sec_size = sizeof("\0b"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "func proto (Bad arg name_off)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, unsigned int ) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(0x0fffffff, 2), - BTF_END_RAW, - }, - .str_sec = "\0a", - .str_sec_size = sizeof("\0a"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid arg#2", -}, - -{ - .descr = "func proto (Bad arg name)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, unsigned int !!!) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_END_RAW, - }, - .str_sec = "\0a\0!!!", - .str_sec_size = sizeof("\0a\0!!!"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid arg#2", -}, - -{ - .descr = "func proto (Invalid return type)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* (*)(int, unsigned int) */ - BTF_FUNC_PROTO_ENC(100, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(0, 1), - BTF_FUNC_PROTO_ARG_ENC(0, 2), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid return type", -}, - -{ - .descr = "func proto (with func name)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void func_proto(int, unsigned int) */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(0, 1), - BTF_FUNC_PROTO_ARG_ENC(0, 2), - BTF_END_RAW, - }, - .str_sec = "\0func_proto", - .str_sec_size = sizeof("\0func_proto"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "func proto (const void arg)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(const void) */ - BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(0, 4), - BTF_CONST_ENC(0), /* [4] */ - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid arg#1", -}, - -{ - .descr = "func (void func(int a, unsigned int b))", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, unsigned int b) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - /* void func(int a, unsigned int b) */ - BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ - BTF_END_RAW, - }, - .str_sec = "\0a\0b\0func", - .str_sec_size = sizeof("\0a\0b\0func"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "func (No func name)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, unsigned int b) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - /* void (int a, unsigned int b) */ - BTF_FUNC_ENC(0, 3), /* [4] */ - BTF_END_RAW, - }, - .str_sec = "\0a\0b", - .str_sec_size = sizeof("\0a\0b"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "func (Invalid func name)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, unsigned int b) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - /* void !!!(int a, unsigned int b) */ - BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ - BTF_END_RAW, - }, - .str_sec = "\0a\0b\0!!!", - .str_sec_size = sizeof("\0a\0b\0!!!"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid name", -}, - -{ - .descr = "func (Some arg has no name)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, unsigned int) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(0, 2), - /* void func(int a, unsigned int) */ - BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ - BTF_END_RAW, - }, - .str_sec = "\0a\0func", - .str_sec_size = sizeof("\0a\0func"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid arg#2", -}, - -{ - .descr = "func (Non zero vlen)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - /* void (*)(int a, unsigned int b) */ - BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - /* void func(int a, unsigned int b) */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 3), /* [4] */ - BTF_END_RAW, - }, - .str_sec = "\0a\0b\0func", - .str_sec_size = sizeof("\0a\0b\0func"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid func linkage", -}, - -{ - .descr = "func (Not referring to FUNC_PROTO)", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_ENC(NAME_TBD, 1), /* [2] */ - BTF_END_RAW, - }, - .str_sec = "\0func", - .str_sec_size = sizeof("\0func"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid type_id", -}, - -{ - .descr = "invalid int kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 1, 0), 4), /* [2] */ - BTF_INT_ENC(0, 0, 32), - BTF_END_RAW, - }, - BTF_STR_SEC(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "int_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "invalid ptr kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 1, 0), 1), /* [2] */ - BTF_END_RAW, - }, - BTF_STR_SEC(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "ptr_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "invalid array kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 1, 0), 0), /* [2] */ - BTF_ARRAY_ENC(1, 1, 1), - BTF_END_RAW, - }, - BTF_STR_SEC(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "array_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "invalid enum kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "valid fwd kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [2] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "fwd_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "invalid typedef kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(NAME_TBD, - BTF_INFO_ENC(BTF_KIND_TYPEDEF, 1, 0), 1), /* [2] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "typedef_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "invalid volatile kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 1, 0), 1), /* [2] */ - BTF_END_RAW, - }, - BTF_STR_SEC(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "volatile_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "invalid const kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 1, 0), 1), /* [2] */ - BTF_END_RAW, - }, - BTF_STR_SEC(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "const_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "invalid restrict kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 1, 0), 1), /* [2] */ - BTF_END_RAW, - }, - BTF_STR_SEC(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "restrict_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "invalid func kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 0), 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 1, 0), 2), /* [3] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "invalid func_proto kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 1, 0), 0), /* [2] */ - BTF_END_RAW, - }, - BTF_STR_SEC(""), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "func_proto_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ - .descr = "valid struct, kind_flag, bitfield_size = 0", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 8), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 0)), - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 32)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "valid struct, kind_flag, int member, bitfield_size != 0", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 4)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "valid union, kind_flag, int member, bitfield_size != 0", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "union_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "valid struct, kind_flag, enum member, bitfield_size != 0", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 4)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B\0C"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "valid union, kind_flag, enum member, bitfield_size != 0", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B\0C"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "union_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "valid struct, kind_flag, typedef member, bitfield_size != 0", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), - BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 4)), - BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ - BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B\0C\0D\0E"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "valid union, kind_flag, typedef member, bitfield_size != 0", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), - BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 0)), - BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ - BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B\0C\0D\0E"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "union_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "invalid struct, kind_flag, bitfield_size greater than struct size", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 20)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Member exceeds struct_size", -}, - -{ - .descr = "invalid struct, kind_flag, bitfield base_type int not regular", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 20, 4), /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 0)), - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 20)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid member base type", -}, - -{ - .descr = "invalid struct, kind_flag, base_type int not regular", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 12, 4), /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 0)), - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 8)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid member base type", -}, - -{ - .descr = "invalid union, kind_flag, bitfield_size greater than struct size", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 2), /* [2] */ - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(8, 0)), - BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "union_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Member exceeds struct_size", -}, - -{ - .descr = "invalid struct, kind_flag, int member, bitfield_size = 0, wrong byte alignment", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid member offset", -}, - -{ - .descr = "invalid struct, kind_flag, enum member, bitfield_size = 0, wrong byte alignment", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B\0C"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid member offset", -}, - -{ - .descr = "128-bit int", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "int_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "struct, 128-bit int member", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "struct, 120-bit int member bitfield", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 120, 16), /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "struct, kind_flag, 128-bit int member", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, - -{ - .descr = "struct, kind_flag, 120-bit int member bitfield", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(120, 0)), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "struct_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, -}, -/* - * typedef int arr_t[16]; - * struct s { - * arr_t *a; - * }; - */ -{ - .descr = "struct->ptr->typedef->array->int size resolution", - .raw_types = { - BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ - BTF_MEMBER_ENC(NAME_TBD, 2, 0), - BTF_PTR_ENC(3), /* [2] */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ - BTF_TYPE_ARRAY_ENC(5, 5, 16), /* [4] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0s\0a\0arr_t"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "ptr_mod_chain_size_resolve_map", - .key_size = sizeof(int), - .value_size = sizeof(int) * 16, - .key_type_id = 5 /* int */, - .value_type_id = 3 /* arr_t */, - .max_entries = 4, -}, -/* - * typedef int arr_t[16][8][4]; - * struct s { - * arr_t *a; - * }; - */ -{ - .descr = "struct->ptr->typedef->multi-array->int size resolution", - .raw_types = { - BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ - BTF_MEMBER_ENC(NAME_TBD, 2, 0), - BTF_PTR_ENC(3), /* [2] */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ - BTF_TYPE_ARRAY_ENC(5, 7, 16), /* [4] */ - BTF_TYPE_ARRAY_ENC(6, 7, 8), /* [5] */ - BTF_TYPE_ARRAY_ENC(7, 7, 4), /* [6] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [7] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0s\0a\0arr_t"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "multi_arr_size_resolve_map", - .key_size = sizeof(int), - .value_size = sizeof(int) * 16 * 8 * 4, - .key_type_id = 7 /* int */, - .value_type_id = 3 /* arr_t */, - .max_entries = 4, -}, -/* - * typedef int int_t; - * typedef int_t arr3_t[4]; - * typedef arr3_t arr2_t[8]; - * typedef arr2_t arr1_t[16]; - * struct s { - * arr1_t *a; - * }; - */ -{ - .descr = "typedef/multi-arr mix size resolution", - .raw_types = { - BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ - BTF_MEMBER_ENC(NAME_TBD, 2, 0), - BTF_PTR_ENC(3), /* [2] */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ - BTF_TYPE_ARRAY_ENC(5, 10, 16), /* [4] */ - BTF_TYPEDEF_ENC(NAME_TBD, 6), /* [5] */ - BTF_TYPE_ARRAY_ENC(7, 10, 8), /* [6] */ - BTF_TYPEDEF_ENC(NAME_TBD, 8), /* [7] */ - BTF_TYPE_ARRAY_ENC(9, 10, 4), /* [8] */ - BTF_TYPEDEF_ENC(NAME_TBD, 10), /* [9] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [10] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0s\0a\0arr1_t\0arr2_t\0arr3_t\0int_t"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "typedef_arra_mix_size_resolve_map", - .key_size = sizeof(int), - .value_size = sizeof(int) * 16 * 8 * 4, - .key_type_id = 10 /* int */, - .value_type_id = 3 /* arr_t */, - .max_entries = 4, -}, - -}; /* struct btf_raw_test raw_tests[] */ - -static const char *get_next_str(const char *start, const char *end) -{ - return start < end - 1 ? start + 1 : NULL; -} - -static int get_raw_sec_size(const __u32 *raw_types) -{ - int i; - - for (i = MAX_NR_RAW_U32 - 1; - i >= 0 && raw_types[i] != BTF_END_RAW; - i--) - ; - - return i < 0 ? i : i * sizeof(raw_types[0]); -} - -static void *btf_raw_create(const struct btf_header *hdr, - const __u32 *raw_types, - const char *str, - unsigned int str_sec_size, - unsigned int *btf_size, - const char **ret_next_str) -{ - const char *next_str = str, *end_str = str + str_sec_size; - const char **strs_idx = NULL, **tmp_strs_idx; - int strs_cap = 0, strs_cnt = 0, next_str_idx = 0; - unsigned int size_needed, offset; - struct btf_header *ret_hdr; - int i, type_sec_size, err = 0; - uint32_t *ret_types; - void *raw_btf = NULL; - - type_sec_size = get_raw_sec_size(raw_types); - if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) - return NULL; - - size_needed = sizeof(*hdr) + type_sec_size + str_sec_size; - raw_btf = malloc(size_needed); - if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf")) - return NULL; - - /* Copy header */ - memcpy(raw_btf, hdr, sizeof(*hdr)); - offset = sizeof(*hdr); - - /* Index strings */ - while ((next_str = get_next_str(next_str, end_str))) { - if (strs_cnt == strs_cap) { - strs_cap += max(16, strs_cap / 2); - tmp_strs_idx = realloc(strs_idx, - sizeof(*strs_idx) * strs_cap); - if (CHECK(!tmp_strs_idx, - "Cannot allocate memory for strs_idx")) { - err = -1; - goto done; - } - strs_idx = tmp_strs_idx; - } - strs_idx[strs_cnt++] = next_str; - next_str += strlen(next_str); - } - - /* Copy type section */ - ret_types = raw_btf + offset; - for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) { - if (raw_types[i] == NAME_TBD) { - if (CHECK(next_str_idx == strs_cnt, - "Error in getting next_str #%d", - next_str_idx)) { - err = -1; - goto done; - } - ret_types[i] = strs_idx[next_str_idx++] - str; - } else if (IS_NAME_NTH(raw_types[i])) { - int idx = GET_NAME_NTH_IDX(raw_types[i]); - - if (CHECK(idx <= 0 || idx > strs_cnt, - "Error getting string #%d, strs_cnt:%d", - idx, strs_cnt)) { - err = -1; - goto done; - } - ret_types[i] = strs_idx[idx-1] - str; - } else { - ret_types[i] = raw_types[i]; - } - } - offset += type_sec_size; - - /* Copy string section */ - memcpy(raw_btf + offset, str, str_sec_size); - - ret_hdr = (struct btf_header *)raw_btf; - ret_hdr->type_len = type_sec_size; - ret_hdr->str_off = type_sec_size; - ret_hdr->str_len = str_sec_size; - - *btf_size = size_needed; - if (ret_next_str) - *ret_next_str = - next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL; - -done: - if (err) { - if (raw_btf) - free(raw_btf); - if (strs_idx) - free(strs_idx); - return NULL; - } - return raw_btf; -} - -static int do_test_raw(unsigned int test_num) -{ - struct btf_raw_test *test = &raw_tests[test_num - 1]; - struct bpf_create_map_attr create_attr = {}; - int map_fd = -1, btf_fd = -1; - unsigned int raw_btf_size; - struct btf_header *hdr; - void *raw_btf; - int err; - - fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr); - raw_btf = btf_raw_create(&hdr_tmpl, - test->raw_types, - test->str_sec, - test->str_sec_size, - &raw_btf_size, NULL); - - if (!raw_btf) - return -1; - - hdr = raw_btf; - - hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta; - hdr->type_off = (int)hdr->type_off + test->type_off_delta; - hdr->str_off = (int)hdr->str_off + test->str_off_delta; - hdr->str_len = (int)hdr->str_len + test->str_len_delta; - - *btf_log_buf = '\0'; - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); - free(raw_btf); - - err = ((btf_fd == -1) != test->btf_load_err); - if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", - btf_fd, test->btf_load_err) || - CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), - "expected err_str:%s", test->err_str)) { - err = -1; - goto done; - } - - if (err || btf_fd == -1) - goto done; - - create_attr.name = test->map_name; - create_attr.map_type = test->map_type; - create_attr.key_size = test->key_size; - create_attr.value_size = test->value_size; - create_attr.max_entries = test->max_entries; - create_attr.btf_fd = btf_fd; - create_attr.btf_key_type_id = test->key_type_id; - create_attr.btf_value_type_id = test->value_type_id; - - map_fd = bpf_create_map_xattr(&create_attr); - - err = ((map_fd == -1) != test->map_create_err); - CHECK(err, "map_fd:%d test->map_create_err:%u", - map_fd, test->map_create_err); - -done: - if (!err) - fprintf(stderr, "OK"); - - if (*btf_log_buf && (err || args.always_log)) - fprintf(stderr, "\n%s", btf_log_buf); - - if (btf_fd != -1) - close(btf_fd); - if (map_fd != -1) - close(map_fd); - - return err; -} - -static int test_raw(void) -{ - unsigned int i; - int err = 0; - - if (args.raw_test_num) - return count_result(do_test_raw(args.raw_test_num)); - - for (i = 1; i <= ARRAY_SIZE(raw_tests); i++) - err |= count_result(do_test_raw(i)); - - return err; -} - -struct btf_get_info_test { - const char *descr; - const char *str_sec; - __u32 raw_types[MAX_NR_RAW_U32]; - __u32 str_sec_size; - int btf_size_delta; - int (*special_test)(unsigned int test_num); -}; - -static int test_big_btf_info(unsigned int test_num); -static int test_btf_id(unsigned int test_num); - -const struct btf_get_info_test get_info_tests[] = { -{ - .descr = "== raw_btf_size+1", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .btf_size_delta = 1, -}, -{ - .descr = "== raw_btf_size-3", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .btf_size_delta = -3, -}, -{ - .descr = "Large bpf_btf_info", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .special_test = test_big_btf_info, -}, -{ - .descr = "BTF ID", - .raw_types = { - /* int */ /* [1] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - /* unsigned int */ /* [2] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), - BTF_END_RAW, - }, - .str_sec = "", - .str_sec_size = sizeof(""), - .special_test = test_btf_id, -}, -}; - -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64)(unsigned long)ptr; -} - -static int test_big_btf_info(unsigned int test_num) -{ - const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; - uint8_t *raw_btf = NULL, *user_btf = NULL; - unsigned int raw_btf_size; - struct { - struct bpf_btf_info info; - uint64_t garbage; - } info_garbage; - struct bpf_btf_info *info; - int btf_fd = -1, err; - uint32_t info_len; - - raw_btf = btf_raw_create(&hdr_tmpl, - test->raw_types, - test->str_sec, - test->str_sec_size, - &raw_btf_size, NULL); - - if (!raw_btf) - return -1; - - *btf_log_buf = '\0'; - - user_btf = malloc(raw_btf_size); - if (CHECK(!user_btf, "!user_btf")) { - err = -1; - goto done; - } - - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); - if (CHECK(btf_fd == -1, "errno:%d", errno)) { - err = -1; - goto done; - } - - /* - * GET_INFO should error out if the userspace info - * has non zero tailing bytes. - */ - info = &info_garbage.info; - memset(info, 0, sizeof(*info)); - info_garbage.garbage = 0xdeadbeef; - info_len = sizeof(info_garbage); - info->btf = ptr_to_u64(user_btf); - info->btf_size = raw_btf_size; - - err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); - if (CHECK(!err, "!err")) { - err = -1; - goto done; - } - - /* - * GET_INFO should succeed even info_len is larger than - * the kernel supported as long as tailing bytes are zero. - * The kernel supported info len should also be returned - * to userspace. - */ - info_garbage.garbage = 0; - err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); - if (CHECK(err || info_len != sizeof(*info), - "err:%d errno:%d info_len:%u sizeof(*info):%zu", - err, errno, info_len, sizeof(*info))) { - err = -1; - goto done; - } - - fprintf(stderr, "OK"); - -done: - if (*btf_log_buf && (err || args.always_log)) - fprintf(stderr, "\n%s", btf_log_buf); - - free(raw_btf); - free(user_btf); - - if (btf_fd != -1) - close(btf_fd); - - return err; -} - -static int test_btf_id(unsigned int test_num) -{ - const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; - struct bpf_create_map_attr create_attr = {}; - uint8_t *raw_btf = NULL, *user_btf[2] = {}; - int btf_fd[2] = {-1, -1}, map_fd = -1; - struct bpf_map_info map_info = {}; - struct bpf_btf_info info[2] = {}; - unsigned int raw_btf_size; - uint32_t info_len; - int err, i, ret; - - raw_btf = btf_raw_create(&hdr_tmpl, - test->raw_types, - test->str_sec, - test->str_sec_size, - &raw_btf_size, NULL); - - if (!raw_btf) - return -1; - - *btf_log_buf = '\0'; - - for (i = 0; i < 2; i++) { - user_btf[i] = malloc(raw_btf_size); - if (CHECK(!user_btf[i], "!user_btf[%d]", i)) { - err = -1; - goto done; - } - info[i].btf = ptr_to_u64(user_btf[i]); - info[i].btf_size = raw_btf_size; - } - - btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); - if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { - err = -1; - goto done; - } - - /* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */ - info_len = sizeof(info[0]); - err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len); - if (CHECK(err, "errno:%d", errno)) { - err = -1; - goto done; - } - - btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id); - if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) { - err = -1; - goto done; - } - - ret = 0; - err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len); - if (CHECK(err || info[0].id != info[1].id || - info[0].btf_size != info[1].btf_size || - (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)), - "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d", - err, errno, info[0].id, info[1].id, - info[0].btf_size, info[1].btf_size, ret)) { - err = -1; - goto done; - } - - /* Test btf members in struct bpf_map_info */ - create_attr.name = "test_btf_id"; - create_attr.map_type = BPF_MAP_TYPE_ARRAY; - create_attr.key_size = sizeof(int); - create_attr.value_size = sizeof(unsigned int); - create_attr.max_entries = 4; - create_attr.btf_fd = btf_fd[0]; - create_attr.btf_key_type_id = 1; - create_attr.btf_value_type_id = 2; - - map_fd = bpf_create_map_xattr(&create_attr); - if (CHECK(map_fd == -1, "errno:%d", errno)) { - err = -1; - goto done; - } - - info_len = sizeof(map_info); - err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); - if (CHECK(err || map_info.btf_id != info[0].id || - map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2, - "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u", - err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id, - map_info.btf_value_type_id)) { - err = -1; - goto done; - } - - for (i = 0; i < 2; i++) { - close(btf_fd[i]); - btf_fd[i] = -1; - } - - /* Test BTF ID is removed from the kernel */ - btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); - if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { - err = -1; - goto done; - } - close(btf_fd[0]); - btf_fd[0] = -1; - - /* The map holds the last ref to BTF and its btf_id */ - close(map_fd); - map_fd = -1; - btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); - if (CHECK(btf_fd[0] != -1, "BTF lingers")) { - err = -1; - goto done; - } - - fprintf(stderr, "OK"); - -done: - if (*btf_log_buf && (err || args.always_log)) - fprintf(stderr, "\n%s", btf_log_buf); - - free(raw_btf); - if (map_fd != -1) - close(map_fd); - for (i = 0; i < 2; i++) { - free(user_btf[i]); - if (btf_fd[i] != -1) - close(btf_fd[i]); - } - - return err; -} - -static int do_test_get_info(unsigned int test_num) -{ - const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; - unsigned int raw_btf_size, user_btf_size, expected_nbytes; - uint8_t *raw_btf = NULL, *user_btf = NULL; - struct bpf_btf_info info = {}; - int btf_fd = -1, err, ret; - uint32_t info_len; - - fprintf(stderr, "BTF GET_INFO test[%u] (%s): ", - test_num, test->descr); - - if (test->special_test) - return test->special_test(test_num); - - raw_btf = btf_raw_create(&hdr_tmpl, - test->raw_types, - test->str_sec, - test->str_sec_size, - &raw_btf_size, NULL); - - if (!raw_btf) - return -1; - - *btf_log_buf = '\0'; - - user_btf = malloc(raw_btf_size); - if (CHECK(!user_btf, "!user_btf")) { - err = -1; - goto done; - } - - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); - if (CHECK(btf_fd == -1, "errno:%d", errno)) { - err = -1; - goto done; - } - - user_btf_size = (int)raw_btf_size + test->btf_size_delta; - expected_nbytes = min(raw_btf_size, user_btf_size); - if (raw_btf_size > expected_nbytes) - memset(user_btf + expected_nbytes, 0xff, - raw_btf_size - expected_nbytes); - - info_len = sizeof(info); - info.btf = ptr_to_u64(user_btf); - info.btf_size = user_btf_size; - - ret = 0; - err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len); - if (CHECK(err || !info.id || info_len != sizeof(info) || - info.btf_size != raw_btf_size || - (ret = memcmp(raw_btf, user_btf, expected_nbytes)), - "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%zu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d", - err, errno, info.id, info_len, sizeof(info), - raw_btf_size, info.btf_size, expected_nbytes, ret)) { - err = -1; - goto done; - } - - while (expected_nbytes < raw_btf_size) { - fprintf(stderr, "%u...", expected_nbytes); - if (CHECK(user_btf[expected_nbytes++] != 0xff, - "user_btf[%u]:%x != 0xff", expected_nbytes - 1, - user_btf[expected_nbytes - 1])) { - err = -1; - goto done; - } - } - - fprintf(stderr, "OK"); - -done: - if (*btf_log_buf && (err || args.always_log)) - fprintf(stderr, "\n%s", btf_log_buf); - - free(raw_btf); - free(user_btf); - - if (btf_fd != -1) - close(btf_fd); - - return err; -} - -static int test_get_info(void) -{ - unsigned int i; - int err = 0; - - if (args.get_info_test_num) - return count_result(do_test_get_info(args.get_info_test_num)); - - for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++) - err |= count_result(do_test_get_info(i)); - - return err; -} - -struct btf_file_test { - const char *file; - bool btf_kv_notfound; -}; - -static struct btf_file_test file_tests[] = { - { .file = "test_btf_haskv.o", }, - { .file = "test_btf_newkv.o", }, - { .file = "test_btf_nokv.o", .btf_kv_notfound = true, }, -}; - -static int do_test_file(unsigned int test_num) -{ - const struct btf_file_test *test = &file_tests[test_num - 1]; - const char *expected_fnames[] = {"_dummy_tracepoint", - "test_long_fname_1", - "test_long_fname_2"}; - struct btf_ext *btf_ext = NULL; - struct bpf_prog_info info = {}; - struct bpf_object *obj = NULL; - struct bpf_func_info *finfo; - struct bpf_program *prog; - __u32 info_len, rec_size; - bool has_btf_ext = false; - struct btf *btf = NULL; - void *func_info = NULL; - struct bpf_map *map; - int i, err, prog_fd; - - fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, - test->file); - - btf = btf__parse_elf(test->file, &btf_ext); - if (IS_ERR(btf)) { - if (PTR_ERR(btf) == -ENOENT) { - fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC); - skip_cnt++; - return 0; - } - return PTR_ERR(btf); - } - btf__free(btf); - - has_btf_ext = btf_ext != NULL; - btf_ext__free(btf_ext); - - obj = bpf_object__open(test->file); - if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) - return PTR_ERR(obj); - - prog = bpf_program__next(NULL, obj); - if (CHECK(!prog, "Cannot find bpf_prog")) { - err = -1; - goto done; - } - - bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); - err = bpf_object__load(obj); - if (CHECK(err < 0, "bpf_object__load: %d", err)) - goto done; - prog_fd = bpf_program__fd(prog); - - map = bpf_object__find_map_by_name(obj, "btf_map"); - if (CHECK(!map, "btf_map not found")) { - err = -1; - goto done; - } - - err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0) - != test->btf_kv_notfound; - if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u", - bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map), - test->btf_kv_notfound)) - goto done; - - if (!has_btf_ext) - goto skip; - - /* get necessary program info */ - info_len = sizeof(struct bpf_prog_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - - if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { - fprintf(stderr, "%s\n", btf_log_buf); - err = -1; - goto done; - } - if (CHECK(info.nr_func_info != 3, - "incorrect info.nr_func_info (1st) %d", - info.nr_func_info)) { - err = -1; - goto done; - } - rec_size = info.func_info_rec_size; - if (CHECK(rec_size != sizeof(struct bpf_func_info), - "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { - err = -1; - goto done; - } - - func_info = malloc(info.nr_func_info * rec_size); - if (CHECK(!func_info, "out of memory")) { - err = -1; - goto done; - } - - /* reset info to only retrieve func_info related data */ - memset(&info, 0, sizeof(info)); - info.nr_func_info = 3; - info.func_info_rec_size = rec_size; - info.func_info = ptr_to_u64(func_info); - - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - - if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { - fprintf(stderr, "%s\n", btf_log_buf); - err = -1; - goto done; - } - if (CHECK(info.nr_func_info != 3, - "incorrect info.nr_func_info (2nd) %d", - info.nr_func_info)) { - err = -1; - goto done; - } - if (CHECK(info.func_info_rec_size != rec_size, - "incorrect info.func_info_rec_size (2nd) %d", - info.func_info_rec_size)) { - err = -1; - goto done; - } - - err = btf__get_from_id(info.btf_id, &btf); - if (CHECK(err, "cannot get btf from kernel, err: %d", err)) - goto done; - - /* check three functions */ - finfo = func_info; - for (i = 0; i < 3; i++) { - const struct btf_type *t; - const char *fname; - - t = btf__type_by_id(btf, finfo->type_id); - if (CHECK(!t, "btf__type_by_id failure: id %u", - finfo->type_id)) { - err = -1; - goto done; - } - - fname = btf__name_by_offset(btf, t->name_off); - err = strcmp(fname, expected_fnames[i]); - /* for the second and third functions in .text section, - * the compiler may order them either way. - */ - if (i && err) - err = strcmp(fname, expected_fnames[3 - i]); - if (CHECK(err, "incorrect fname %s", fname ? : "")) { - err = -1; - goto done; - } - - finfo = (void *)finfo + rec_size; - } - -skip: - fprintf(stderr, "OK"); - -done: - free(func_info); - bpf_object__close(obj); - return err; -} - -static int test_file(void) -{ - unsigned int i; - int err = 0; - - if (args.file_test_num) - return count_result(do_test_file(args.file_test_num)); - - for (i = 1; i <= ARRAY_SIZE(file_tests); i++) - err |= count_result(do_test_file(i)); - - return err; -} - -const char *pprint_enum_str[] = { - "ENUM_ZERO", - "ENUM_ONE", - "ENUM_TWO", - "ENUM_THREE", -}; - -struct pprint_mapv { - uint32_t ui32; - uint16_t ui16; - /* 2 bytes hole */ - int32_t si32; - uint32_t unused_bits2a:2, - bits28:28, - unused_bits2b:2; - union { - uint64_t ui64; - uint8_t ui8a[8]; - }; - enum { - ENUM_ZERO, - ENUM_ONE, - ENUM_TWO, - ENUM_THREE, - } aenum; - uint32_t ui32b; - uint32_t bits2c:2; - uint8_t si8_4[2][2]; -}; - -#ifdef __SIZEOF_INT128__ -struct pprint_mapv_int128 { - __int128 si128a; - __int128 si128b; - unsigned __int128 bits3:3; - unsigned __int128 bits80:80; - unsigned __int128 ui128; -}; -#endif - -static struct btf_raw_test pprint_test_template[] = { -{ - .raw_types = { - /* unsighed char */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), - /* unsigned short */ /* [2] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), - /* unsigned int */ /* [3] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), - /* int */ /* [4] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - /* unsigned long long */ /* [5] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), - /* 2 bits */ /* [6] */ - BTF_TYPE_INT_ENC(0, 0, 0, 2, 2), - /* 28 bits */ /* [7] */ - BTF_TYPE_INT_ENC(0, 0, 0, 28, 4), - /* uint8_t[8] */ /* [8] */ - BTF_TYPE_ARRAY_ENC(9, 1, 8), - /* typedef unsigned char uint8_t */ /* [9] */ - BTF_TYPEDEF_ENC(NAME_TBD, 1), - /* typedef unsigned short uint16_t */ /* [10] */ - BTF_TYPEDEF_ENC(NAME_TBD, 2), - /* typedef unsigned int uint32_t */ /* [11] */ - BTF_TYPEDEF_ENC(NAME_TBD, 3), - /* typedef int int32_t */ /* [12] */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), - /* typedef unsigned long long uint64_t *//* [13] */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), - /* union (anon) */ /* [14] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), - BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ - BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ - /* enum (anon) */ /* [15] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_ENUM_ENC(NAME_TBD, 1), - BTF_ENUM_ENC(NAME_TBD, 2), - BTF_ENUM_ENC(NAME_TBD, 3), - /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 11), 40), - BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ - BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ - BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ - BTF_MEMBER_ENC(NAME_TBD, 6, 96), /* unused_bits2a */ - BTF_MEMBER_ENC(NAME_TBD, 7, 98), /* bits28 */ - BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */ - BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */ - BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ - BTF_MEMBER_ENC(NAME_TBD, 11, 224), /* uint32_t ui32b */ - BTF_MEMBER_ENC(NAME_TBD, 6, 256), /* bits2c */ - BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */ - BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */ - BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"), - .key_size = sizeof(unsigned int), - .value_size = sizeof(struct pprint_mapv), - .key_type_id = 3, /* unsigned int */ - .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, -}, - -{ - /* this type will have the same type as the - * first .raw_types definition, but struct type will - * be encoded with kind_flag set. - */ - .raw_types = { - /* unsighed char */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), - /* unsigned short */ /* [2] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), - /* unsigned int */ /* [3] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), - /* int */ /* [4] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - /* unsigned long long */ /* [5] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ - /* uint8_t[8] */ /* [8] */ - BTF_TYPE_ARRAY_ENC(9, 1, 8), - /* typedef unsigned char uint8_t */ /* [9] */ - BTF_TYPEDEF_ENC(NAME_TBD, 1), - /* typedef unsigned short uint16_t */ /* [10] */ - BTF_TYPEDEF_ENC(NAME_TBD, 2), - /* typedef unsigned int uint32_t */ /* [11] */ - BTF_TYPEDEF_ENC(NAME_TBD, 3), - /* typedef int int32_t */ /* [12] */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), - /* typedef unsigned long long uint64_t *//* [13] */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), - /* union (anon) */ /* [14] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), - BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ - BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ - /* enum (anon) */ /* [15] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_ENUM_ENC(NAME_TBD, 1), - BTF_ENUM_ENC(NAME_TBD, 2), - BTF_ENUM_ENC(NAME_TBD, 3), - /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40), - BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ - BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ - BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ - BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ - BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ - BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 126)), /* unused_bits2b */ - BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ - BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ - BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ - BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ - BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */ - BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */ - BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"), - .key_size = sizeof(unsigned int), - .value_size = sizeof(struct pprint_mapv), - .key_type_id = 3, /* unsigned int */ - .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, -}, - -{ - /* this type will have the same layout as the - * first .raw_types definition. The struct type will - * be encoded with kind_flag set, bitfield members - * are added typedef/const/volatile, and bitfield members - * will have both int and enum types. - */ - .raw_types = { - /* unsighed char */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), - /* unsigned short */ /* [2] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), - /* unsigned int */ /* [3] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), - /* int */ /* [4] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), - /* unsigned long long */ /* [5] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ - BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ - /* uint8_t[8] */ /* [8] */ - BTF_TYPE_ARRAY_ENC(9, 1, 8), - /* typedef unsigned char uint8_t */ /* [9] */ - BTF_TYPEDEF_ENC(NAME_TBD, 1), - /* typedef unsigned short uint16_t */ /* [10] */ - BTF_TYPEDEF_ENC(NAME_TBD, 2), - /* typedef unsigned int uint32_t */ /* [11] */ - BTF_TYPEDEF_ENC(NAME_TBD, 3), - /* typedef int int32_t */ /* [12] */ - BTF_TYPEDEF_ENC(NAME_TBD, 4), - /* typedef unsigned long long uint64_t *//* [13] */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), - /* union (anon) */ /* [14] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), - BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ - BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ - /* enum (anon) */ /* [15] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_ENUM_ENC(NAME_TBD, 1), - BTF_ENUM_ENC(NAME_TBD, 2), - BTF_ENUM_ENC(NAME_TBD, 3), - /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40), - BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ - BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ - BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ - BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ - BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ - BTF_MEMBER_ENC(NAME_TBD, 19, BTF_MEMBER_OFFSET(2, 126)),/* unused_bits2b */ - BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ - BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ - BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ - BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ - BTF_MEMBER_ENC(NAME_TBD, 20, BTF_MEMBER_OFFSET(0, 264)), /* si8_4 */ - /* typedef unsigned int ___int */ /* [17] */ - BTF_TYPEDEF_ENC(NAME_TBD, 18), - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */ - BTF_TYPE_ARRAY_ENC(21, 1, 2), /* [20] */ - BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [21] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int\0si8_4"), - .key_size = sizeof(unsigned int), - .value_size = sizeof(struct pprint_mapv), - .key_type_id = 3, /* unsigned int */ - .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, -}, - -#ifdef __SIZEOF_INT128__ -{ - /* test int128 */ - .raw_types = { - /* unsigned int */ /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), - /* __int128 */ /* [2] */ - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 128, 16), - /* unsigned __int128 */ /* [3] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 128, 16), - /* struct pprint_mapv_int128 */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 5), 64), - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), /* si128a */ - BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 128)), /* si128b */ - BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(3, 256)), /* bits3 */ - BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(80, 259)), /* bits80 */ - BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(0, 384)), /* ui128 */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0unsigned int\0__int128\0unsigned __int128\0pprint_mapv_int128\0si128a\0si128b\0bits3\0bits80\0ui128"), - .key_size = sizeof(unsigned int), - .value_size = sizeof(struct pprint_mapv_int128), - .key_type_id = 1, - .value_type_id = 4, - .max_entries = 128 * 1024, - .mapv_kind = PPRINT_MAPV_KIND_INT128, -}, -#endif - -}; - -static struct btf_pprint_test_meta { - const char *descr; - enum bpf_map_type map_type; - const char *map_name; - bool ordered_map; - bool lossless_map; - bool percpu_map; -} pprint_tests_meta[] = { -{ - .descr = "BTF pretty print array", - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "pprint_test_array", - .ordered_map = true, - .lossless_map = true, - .percpu_map = false, -}, - -{ - .descr = "BTF pretty print hash", - .map_type = BPF_MAP_TYPE_HASH, - .map_name = "pprint_test_hash", - .ordered_map = false, - .lossless_map = true, - .percpu_map = false, -}, - -{ - .descr = "BTF pretty print lru hash", - .map_type = BPF_MAP_TYPE_LRU_HASH, - .map_name = "pprint_test_lru_hash", - .ordered_map = false, - .lossless_map = false, - .percpu_map = false, -}, - -{ - .descr = "BTF pretty print percpu array", - .map_type = BPF_MAP_TYPE_PERCPU_ARRAY, - .map_name = "pprint_test_percpu_array", - .ordered_map = true, - .lossless_map = true, - .percpu_map = true, -}, - -{ - .descr = "BTF pretty print percpu hash", - .map_type = BPF_MAP_TYPE_PERCPU_HASH, - .map_name = "pprint_test_percpu_hash", - .ordered_map = false, - .lossless_map = true, - .percpu_map = true, -}, - -{ - .descr = "BTF pretty print lru percpu hash", - .map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH, - .map_name = "pprint_test_lru_percpu_hash", - .ordered_map = false, - .lossless_map = false, - .percpu_map = true, -}, - -}; - -static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) -{ - if (mapv_kind == PPRINT_MAPV_KIND_BASIC) - return sizeof(struct pprint_mapv); - -#ifdef __SIZEOF_INT128__ - if (mapv_kind == PPRINT_MAPV_KIND_INT128) - return sizeof(struct pprint_mapv_int128); -#endif - - assert(0); -} - -static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, - void *mapv, uint32_t i, - int num_cpus, int rounded_value_size) -{ - int cpu; - - if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { - struct pprint_mapv *v = mapv; - - for (cpu = 0; cpu < num_cpus; cpu++) { - v->ui32 = i + cpu; - v->si32 = -i; - v->unused_bits2a = 3; - v->bits28 = i; - v->unused_bits2b = 3; - v->ui64 = i; - v->aenum = i & 0x03; - v->ui32b = 4; - v->bits2c = 1; - v->si8_4[0][0] = (cpu + i) & 0xff; - v->si8_4[0][1] = (cpu + i + 1) & 0xff; - v->si8_4[1][0] = (cpu + i + 2) & 0xff; - v->si8_4[1][1] = (cpu + i + 3) & 0xff; - v = (void *)v + rounded_value_size; - } - } - -#ifdef __SIZEOF_INT128__ - if (mapv_kind == PPRINT_MAPV_KIND_INT128) { - struct pprint_mapv_int128 *v = mapv; - - for (cpu = 0; cpu < num_cpus; cpu++) { - v->si128a = i; - v->si128b = -i; - v->bits3 = i & 0x07; - v->bits80 = (((unsigned __int128)1) << 64) + i; - v->ui128 = (((unsigned __int128)2) << 64) + i; - v = (void *)v + rounded_value_size; - } - } -#endif -} - -ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, - char *expected_line, ssize_t line_size, - bool percpu_map, unsigned int next_key, - int cpu, void *mapv) -{ - ssize_t nexpected_line = -1; - - if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { - struct pprint_mapv *v = mapv; - - nexpected_line = snprintf(expected_line, line_size, - "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," - "{%llu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," - "%u,0x%x,[[%d,%d],[%d,%d]]}\n", - percpu_map ? "\tcpu" : "", - percpu_map ? cpu : next_key, - v->ui32, v->si32, - v->unused_bits2a, - v->bits28, - v->unused_bits2b, - (__u64)v->ui64, - v->ui8a[0], v->ui8a[1], - v->ui8a[2], v->ui8a[3], - v->ui8a[4], v->ui8a[5], - v->ui8a[6], v->ui8a[7], - pprint_enum_str[v->aenum], - v->ui32b, - v->bits2c, - v->si8_4[0][0], v->si8_4[0][1], - v->si8_4[1][0], v->si8_4[1][1]); - } - -#ifdef __SIZEOF_INT128__ - if (mapv_kind == PPRINT_MAPV_KIND_INT128) { - struct pprint_mapv_int128 *v = mapv; - - nexpected_line = snprintf(expected_line, line_size, - "%s%u: {0x%lx,0x%lx,0x%lx," - "0x%lx%016lx,0x%lx%016lx}\n", - percpu_map ? "\tcpu" : "", - percpu_map ? cpu : next_key, - (uint64_t)v->si128a, - (uint64_t)v->si128b, - (uint64_t)v->bits3, - (uint64_t)(v->bits80 >> 64), - (uint64_t)v->bits80, - (uint64_t)(v->ui128 >> 64), - (uint64_t)v->ui128); - } -#endif - - return nexpected_line; -} - -static int check_line(const char *expected_line, int nexpected_line, - int expected_line_len, const char *line) -{ - if (CHECK(nexpected_line == expected_line_len, - "expected_line is too long")) - return -1; - - if (strcmp(expected_line, line)) { - fprintf(stderr, "unexpected pprint output\n"); - fprintf(stderr, "expected: %s", expected_line); - fprintf(stderr, " read: %s", line); - return -1; - } - - return 0; -} - - -static int do_test_pprint(int test_num) -{ - const struct btf_raw_test *test = &pprint_test_template[test_num]; - enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; - struct bpf_create_map_attr create_attr = {}; - bool ordered_map, lossless_map, percpu_map; - int err, ret, num_cpus, rounded_value_size; - unsigned int key, nr_read_elems; - int map_fd = -1, btf_fd = -1; - unsigned int raw_btf_size; - char expected_line[255]; - FILE *pin_file = NULL; - char pin_path[255]; - size_t line_len = 0; - char *line = NULL; - void *mapv = NULL; - uint8_t *raw_btf; - ssize_t nread; - - fprintf(stderr, "%s(#%d)......", test->descr, test_num); - raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, - test->str_sec, test->str_sec_size, - &raw_btf_size, NULL); - - if (!raw_btf) - return -1; - - *btf_log_buf = '\0'; - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); - free(raw_btf); - - if (CHECK(btf_fd == -1, "errno:%d", errno)) { - err = -1; - goto done; - } - - create_attr.name = test->map_name; - create_attr.map_type = test->map_type; - create_attr.key_size = test->key_size; - create_attr.value_size = test->value_size; - create_attr.max_entries = test->max_entries; - create_attr.btf_fd = btf_fd; - create_attr.btf_key_type_id = test->key_type_id; - create_attr.btf_value_type_id = test->value_type_id; - - map_fd = bpf_create_map_xattr(&create_attr); - if (CHECK(map_fd == -1, "errno:%d", errno)) { - err = -1; - goto done; - } - - ret = snprintf(pin_path, sizeof(pin_path), "%s/%s", - "/sys/fs/bpf", test->map_name); - - if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long", - "/sys/fs/bpf", test->map_name)) { - err = -1; - goto done; - } - - err = bpf_obj_pin(map_fd, pin_path); - if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno)) - goto done; - - percpu_map = test->percpu_map; - num_cpus = percpu_map ? bpf_num_possible_cpus() : 1; - rounded_value_size = round_up(get_pprint_mapv_size(mapv_kind), 8); - mapv = calloc(num_cpus, rounded_value_size); - if (CHECK(!mapv, "mapv allocation failure")) { - err = -1; - goto done; - } - - for (key = 0; key < test->max_entries; key++) { - set_pprint_mapv(mapv_kind, mapv, key, num_cpus, rounded_value_size); - bpf_map_update_elem(map_fd, &key, mapv, 0); - } - - pin_file = fopen(pin_path, "r"); - if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) { - err = -1; - goto done; - } - - /* Skip lines start with '#' */ - while ((nread = getline(&line, &line_len, pin_file)) > 0 && - *line == '#') - ; - - if (CHECK(nread <= 0, "Unexpected EOF")) { - err = -1; - goto done; - } - - nr_read_elems = 0; - ordered_map = test->ordered_map; - lossless_map = test->lossless_map; - do { - ssize_t nexpected_line; - unsigned int next_key; - void *cmapv; - int cpu; - - next_key = ordered_map ? nr_read_elems : atoi(line); - set_pprint_mapv(mapv_kind, mapv, next_key, num_cpus, rounded_value_size); - cmapv = mapv; - - for (cpu = 0; cpu < num_cpus; cpu++) { - if (percpu_map) { - /* for percpu map, the format looks like: - * : { - * cpu0: - * cpu1: - * ... - * cpun: - * } - * - * let us verify the line containing the key here. - */ - if (cpu == 0) { - nexpected_line = snprintf(expected_line, - sizeof(expected_line), - "%u: {\n", - next_key); - - err = check_line(expected_line, nexpected_line, - sizeof(expected_line), line); - if (err == -1) - goto done; - } - - /* read value@cpu */ - nread = getline(&line, &line_len, pin_file); - if (nread < 0) - break; - } - - nexpected_line = get_pprint_expected_line(mapv_kind, expected_line, - sizeof(expected_line), - percpu_map, next_key, - cpu, cmapv); - err = check_line(expected_line, nexpected_line, - sizeof(expected_line), line); - if (err == -1) - goto done; - - cmapv = cmapv + rounded_value_size; - } - - if (percpu_map) { - /* skip the last bracket for the percpu map */ - nread = getline(&line, &line_len, pin_file); - if (nread < 0) - break; - } - - nread = getline(&line, &line_len, pin_file); - } while (++nr_read_elems < test->max_entries && nread > 0); - - if (lossless_map && - CHECK(nr_read_elems < test->max_entries, - "Unexpected EOF. nr_read_elems:%u test->max_entries:%u", - nr_read_elems, test->max_entries)) { - err = -1; - goto done; - } - - if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) { - err = -1; - goto done; - } - - err = 0; - -done: - if (mapv) - free(mapv); - if (!err) - fprintf(stderr, "OK"); - if (*btf_log_buf && (err || args.always_log)) - fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) - close(btf_fd); - if (map_fd != -1) - close(map_fd); - if (pin_file) - fclose(pin_file); - unlink(pin_path); - free(line); - - return err; -} - -static int test_pprint(void) -{ - unsigned int i; - int err = 0; - - /* test various maps with the first test template */ - for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) { - pprint_test_template[0].descr = pprint_tests_meta[i].descr; - pprint_test_template[0].map_type = pprint_tests_meta[i].map_type; - pprint_test_template[0].map_name = pprint_tests_meta[i].map_name; - pprint_test_template[0].ordered_map = pprint_tests_meta[i].ordered_map; - pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map; - pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map; - - err |= count_result(do_test_pprint(0)); - } - - /* test rest test templates with the first map */ - for (i = 1; i < ARRAY_SIZE(pprint_test_template); i++) { - pprint_test_template[i].descr = pprint_tests_meta[0].descr; - pprint_test_template[i].map_type = pprint_tests_meta[0].map_type; - pprint_test_template[i].map_name = pprint_tests_meta[0].map_name; - pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map; - pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map; - pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map; - err |= count_result(do_test_pprint(i)); - } - - return err; -} - -#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \ - (insn_off), (file_off), (line_off), ((line_num) << 10 | ((line_col) & 0x3ff)) - -static struct prog_info_raw_test { - const char *descr; - const char *str_sec; - const char *err_str; - __u32 raw_types[MAX_NR_RAW_U32]; - __u32 str_sec_size; - struct bpf_insn insns[MAX_INSNS]; - __u32 prog_type; - __u32 func_info[MAX_SUBPROGS][2]; - __u32 func_info_rec_size; - __u32 func_info_cnt; - __u32 line_info[MAX_NR_RAW_U32]; - __u32 line_info_rec_size; - __u32 nr_jited_ksyms; - bool expected_prog_load_failure; - __u32 dead_code_cnt; - __u32 dead_code_mask; - __u32 dead_func_cnt; - __u32 dead_func_mask; -} info_raw_tests[] = { -{ - .descr = "func_type (main func + one sub)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ - BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ - BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ - BTF_END_RAW, - }, - .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", - .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info = { {0, 5}, {3, 6} }, - .func_info_rec_size = 8, - .func_info_cnt = 2, - .line_info = { BTF_END_RAW }, -}, - -{ - .descr = "func_type (Incorrect func_info_rec_size)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ - BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ - BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ - BTF_END_RAW, - }, - .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", - .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info = { {0, 5}, {3, 6} }, - .func_info_rec_size = 4, - .func_info_cnt = 2, - .line_info = { BTF_END_RAW }, - .expected_prog_load_failure = true, -}, - -{ - .descr = "func_type (Incorrect func_info_cnt)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ - BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ - BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ - BTF_END_RAW, - }, - .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", - .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info = { {0, 5}, {3, 6} }, - .func_info_rec_size = 8, - .func_info_cnt = 1, - .line_info = { BTF_END_RAW }, - .expected_prog_load_failure = true, -}, - -{ - .descr = "func_type (Incorrect bpf_func_info.insn_off)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ - BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ - BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ - BTF_END_RAW, - }, - .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", - .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info = { {0, 5}, {2, 6} }, - .func_info_rec_size = 8, - .func_info_cnt = 2, - .line_info = { BTF_END_RAW }, - .expected_prog_load_failure = true, -}, - -{ - .descr = "line_info (No subprog)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 1, -}, - -{ - .descr = "line_info (No subprog. insn_off >= prog->len)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), - BPF_LINE_INFO_ENC(4, 0, 0, 5, 6), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 1, - .err_str = "line_info[4].insn_off", - .expected_prog_load_failure = true, -}, - -{ - .descr = "line_info (Zero bpf insn code)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), /* [2] */ - BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [3] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0unsigned long\0u64\0u64 a=1;\0return a;"), - .insns = { - BPF_LD_IMM64(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(1, 0, 0, 2, 9), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 1, - .err_str = "Invalid insn code at line_info[1]", - .expected_prog_load_failure = true, -}, - -{ - .descr = "line_info (No subprog. zero tailing line_info", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, - BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 0, - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), - .nr_jited_ksyms = 1, -}, - -{ - .descr = "line_info (No subprog. nonzero tailing line_info)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, - BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 1, - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), - .nr_jited_ksyms = 1, - .err_str = "nonzero tailing record in line_info", - .expected_prog_load_failure = true, -}, - -{ - .descr = "line_info (subprog)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, -}, - -{ - .descr = "line_info (subprog + func_info)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 2, - .func_info_rec_size = 8, - .func_info = { {0, 4}, {5, 3} }, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, -}, - -{ - .descr = "line_info (subprog. missing 1st func line info)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, - .err_str = "missing bpf_line_info for func#0", - .expected_prog_load_failure = true, -}, - -{ - .descr = "line_info (subprog. missing 2nd func line info)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, - .err_str = "missing bpf_line_info for func#1", - .expected_prog_load_failure = true, -}, - -{ - .descr = "line_info (subprog. unordered insn offset)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, - .err_str = "Invalid line_info[2].insn_off", - .expected_prog_load_failure = true, -}, - -{ - .descr = "line_info (dead start)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0/* dead jmp */\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), - BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 6), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 1, - .dead_code_cnt = 1, - .dead_code_mask = 0x01, -}, - -{ - .descr = "line_info (dead end)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0/* dead jmp */\0return a + b;\0/* dead exit */"), - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 1), - BPF_EXIT_INSN(), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 0, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 12), - BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 11), - BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 10), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 9), - BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 8), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 6, 7), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 1, - .dead_code_cnt = 2, - .dead_code_mask = 0x28, -}, - -{ - .descr = "line_info (dead code + subprog + func_info)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0/* dead jmp */" - "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" - "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" - "\0return func(a);\0b+=1;\0return b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 8), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 2, - .func_info_rec_size = 8, - .func_info = { {0, 4}, {14, 3} }, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(14, 0, NAME_TBD, 3, 8), - BPF_LINE_INFO_ENC(16, 0, NAME_TBD, 4, 7), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, - .dead_code_cnt = 9, - .dead_code_mask = 0x3fe, -}, - -{ - .descr = "line_info (dead subprog)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" - "\0return 0;\0return 0;\0/* dead */\0/* dead */" - "\0/* dead */\0return bla + 1;\0return bla + 1;" - "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), - BPF_CALL_REL(3), - BPF_CALL_REL(5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 3, - .func_info_rec_size = 8, - .func_info = { {0, 4}, {6, 3}, {9, 5} }, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, - .dead_code_cnt = 3, - .dead_code_mask = 0x70, - .dead_func_cnt = 1, - .dead_func_mask = 0x2, -}, - -{ - .descr = "line_info (dead last subprog)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0x\0dead\0main\0int a=1+1;\0/* live call */" - "\0return 0;\0/* dead */\0/* dead */"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), - BPF_CALL_REL(2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 2, - .func_info_rec_size = 8, - .func_info = { {0, 4}, {5, 3} }, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 1, - .dead_code_cnt = 2, - .dead_code_mask = 0x18, - .dead_func_cnt = 1, - .dead_func_mask = 0x2, -}, - -{ - .descr = "line_info (dead subprog + dead start)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* dead */" - "\0return 0;\0return 0;\0return 0;" - "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" - "\0return b + 1;\0return b + 1;\0return b + 1;"), - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), - BPF_CALL_REL(3), - BPF_CALL_REL(5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_REG(BPF_REG_0, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 3, - .func_info_rec_size = 8, - .func_info = { {0, 4}, {7, 3}, {10, 5} }, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), - BPF_LINE_INFO_ENC(13, 0, NAME_TBD, 2, 9), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, - .dead_code_cnt = 5, - .dead_code_mask = 0x1e2, - .dead_func_cnt = 1, - .dead_func_mask = 0x2, -}, - -{ - .descr = "line_info (dead subprog + dead start w/ move)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" - "\0return 0;\0return 0;\0/* dead */\0/* dead */" - "\0/* dead */\0return bla + 1;\0return bla + 1;" - "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), - BPF_CALL_REL(3), - BPF_CALL_REL(5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_CALL_REL(1), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_REG(BPF_REG_0, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 3, - .func_info_rec_size = 8, - .func_info = { {0, 4}, {6, 3}, {9, 5} }, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, - .dead_code_cnt = 3, - .dead_code_mask = 0x70, - .dead_func_cnt = 1, - .dead_func_mask = 0x2, -}, - -{ - .descr = "line_info (dead end + subprog start w/ no linfo)", - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"), - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 3), - BPF_CALL_REL(3), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .func_info_cnt = 2, - .func_info_rec_size = 8, - .func_info = { {0, 3}, {6, 4}, }, - .line_info = { - BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), - BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), - BTF_END_RAW, - }, - .line_info_rec_size = sizeof(struct bpf_line_info), - .nr_jited_ksyms = 2, -}, - -}; - -static size_t probe_prog_length(const struct bpf_insn *fp) -{ - size_t len; - - for (len = MAX_INSNS - 1; len > 0; --len) - if (fp[len].code != 0 || fp[len].imm != 0) - break; - return len + 1; -} - -static __u32 *patch_name_tbd(const __u32 *raw_u32, - const char *str, __u32 str_off, - unsigned int str_sec_size, - unsigned int *ret_size) -{ - int i, raw_u32_size = get_raw_sec_size(raw_u32); - const char *end_str = str + str_sec_size; - const char *next_str = str + str_off; - __u32 *new_u32 = NULL; - - if (raw_u32_size == -1) - return ERR_PTR(-EINVAL); - - if (!raw_u32_size) { - *ret_size = 0; - return NULL; - } - - new_u32 = malloc(raw_u32_size); - if (!new_u32) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < raw_u32_size / sizeof(raw_u32[0]); i++) { - if (raw_u32[i] == NAME_TBD) { - next_str = get_next_str(next_str, end_str); - if (CHECK(!next_str, "Error in getting next_str\n")) { - free(new_u32); - return ERR_PTR(-EINVAL); - } - new_u32[i] = next_str - str; - next_str += strlen(next_str); - } else { - new_u32[i] = raw_u32[i]; - } - } - - *ret_size = raw_u32_size; - return new_u32; -} - -static int test_get_finfo(const struct prog_info_raw_test *test, - int prog_fd) -{ - struct bpf_prog_info info = {}; - struct bpf_func_info *finfo; - __u32 info_len, rec_size, i; - void *func_info = NULL; - __u32 nr_func_info; - int err; - - /* get necessary lens */ - info_len = sizeof(struct bpf_prog_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { - fprintf(stderr, "%s\n", btf_log_buf); - return -1; - } - nr_func_info = test->func_info_cnt - test->dead_func_cnt; - if (CHECK(info.nr_func_info != nr_func_info, - "incorrect info.nr_func_info (1st) %d", - info.nr_func_info)) { - return -1; - } - - rec_size = info.func_info_rec_size; - if (CHECK(rec_size != sizeof(struct bpf_func_info), - "incorrect info.func_info_rec_size (1st) %d", rec_size)) { - return -1; - } - - if (!info.nr_func_info) - return 0; - - func_info = malloc(info.nr_func_info * rec_size); - if (CHECK(!func_info, "out of memory")) - return -1; - - /* reset info to only retrieve func_info related data */ - memset(&info, 0, sizeof(info)); - info.nr_func_info = nr_func_info; - info.func_info_rec_size = rec_size; - info.func_info = ptr_to_u64(func_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { - fprintf(stderr, "%s\n", btf_log_buf); - err = -1; - goto done; - } - if (CHECK(info.nr_func_info != nr_func_info, - "incorrect info.nr_func_info (2nd) %d", - info.nr_func_info)) { - err = -1; - goto done; - } - if (CHECK(info.func_info_rec_size != rec_size, - "incorrect info.func_info_rec_size (2nd) %d", - info.func_info_rec_size)) { - err = -1; - goto done; - } - - finfo = func_info; - for (i = 0; i < nr_func_info; i++) { - if (test->dead_func_mask & (1 << i)) - continue; - if (CHECK(finfo->type_id != test->func_info[i][1], - "incorrect func_type %u expected %u", - finfo->type_id, test->func_info[i][1])) { - err = -1; - goto done; - } - finfo = (void *)finfo + rec_size; - } - - err = 0; - -done: - free(func_info); - return err; -} - -static int test_get_linfo(const struct prog_info_raw_test *test, - const void *patched_linfo, - __u32 cnt, int prog_fd) -{ - __u32 i, info_len, nr_jited_ksyms, nr_jited_func_lens; - __u64 *jited_linfo = NULL, *jited_ksyms = NULL; - __u32 rec_size, jited_rec_size, jited_cnt; - struct bpf_line_info *linfo = NULL; - __u32 cur_func_len, ksyms_found; - struct bpf_prog_info info = {}; - __u32 *jited_func_lens = NULL; - __u64 cur_func_ksyms; - __u32 dead_insns; - int err; - - jited_cnt = cnt; - rec_size = sizeof(*linfo); - jited_rec_size = sizeof(*jited_linfo); - if (test->nr_jited_ksyms) - nr_jited_ksyms = test->nr_jited_ksyms; - else - nr_jited_ksyms = test->func_info_cnt - test->dead_func_cnt; - nr_jited_func_lens = nr_jited_ksyms; - - info_len = sizeof(struct bpf_prog_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "err:%d errno:%d", err, errno)) { - err = -1; - goto done; - } - - if (!info.jited_prog_len) { - /* prog is not jited */ - jited_cnt = 0; - nr_jited_ksyms = 1; - nr_jited_func_lens = 1; - } - - if (CHECK(info.nr_line_info != cnt || - info.nr_jited_line_info != jited_cnt || - info.nr_jited_ksyms != nr_jited_ksyms || - info.nr_jited_func_lens != nr_jited_func_lens || - (!info.nr_line_info && info.nr_jited_line_info), - "info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) nr_jited_ksyms:%u(expected:%u) nr_jited_func_lens:%u(expected:%u)", - info.nr_line_info, cnt, - info.nr_jited_line_info, jited_cnt, - info.nr_jited_ksyms, nr_jited_ksyms, - info.nr_jited_func_lens, nr_jited_func_lens)) { - err = -1; - goto done; - } - - if (CHECK(info.line_info_rec_size != sizeof(struct bpf_line_info) || - info.jited_line_info_rec_size != sizeof(__u64), - "info: line_info_rec_size:%u(userspace expected:%u) jited_line_info_rec_size:%u(userspace expected:%u)", - info.line_info_rec_size, rec_size, - info.jited_line_info_rec_size, jited_rec_size)) { - err = -1; - goto done; - } - - if (!cnt) - return 0; - - rec_size = info.line_info_rec_size; - jited_rec_size = info.jited_line_info_rec_size; - - memset(&info, 0, sizeof(info)); - - linfo = calloc(cnt, rec_size); - if (CHECK(!linfo, "!linfo")) { - err = -1; - goto done; - } - info.nr_line_info = cnt; - info.line_info_rec_size = rec_size; - info.line_info = ptr_to_u64(linfo); - - if (jited_cnt) { - jited_linfo = calloc(jited_cnt, jited_rec_size); - jited_ksyms = calloc(nr_jited_ksyms, sizeof(*jited_ksyms)); - jited_func_lens = calloc(nr_jited_func_lens, - sizeof(*jited_func_lens)); - if (CHECK(!jited_linfo || !jited_ksyms || !jited_func_lens, - "jited_linfo:%p jited_ksyms:%p jited_func_lens:%p", - jited_linfo, jited_ksyms, jited_func_lens)) { - err = -1; - goto done; - } - - info.nr_jited_line_info = jited_cnt; - info.jited_line_info_rec_size = jited_rec_size; - info.jited_line_info = ptr_to_u64(jited_linfo); - info.nr_jited_ksyms = nr_jited_ksyms; - info.jited_ksyms = ptr_to_u64(jited_ksyms); - info.nr_jited_func_lens = nr_jited_func_lens; - info.jited_func_lens = ptr_to_u64(jited_func_lens); - } - - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - - /* - * Only recheck the info.*line_info* fields. - * Other fields are not the concern of this test. - */ - if (CHECK(err == -1 || - info.nr_line_info != cnt || - (jited_cnt && !info.jited_line_info) || - info.nr_jited_line_info != jited_cnt || - info.line_info_rec_size != rec_size || - info.jited_line_info_rec_size != jited_rec_size, - "err:%d errno:%d info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) line_info_rec_size:%u(expected:%u) jited_linfo_rec_size:%u(expected:%u) line_info:%p jited_line_info:%p", - err, errno, - info.nr_line_info, cnt, - info.nr_jited_line_info, jited_cnt, - info.line_info_rec_size, rec_size, - info.jited_line_info_rec_size, jited_rec_size, - (void *)(long)info.line_info, - (void *)(long)info.jited_line_info)) { - err = -1; - goto done; - } - - dead_insns = 0; - while (test->dead_code_mask & (1 << dead_insns)) - dead_insns++; - - CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", - linfo[0].insn_off); - for (i = 1; i < cnt; i++) { - const struct bpf_line_info *expected_linfo; - - while (test->dead_code_mask & (1 << (i + dead_insns))) - dead_insns++; - - expected_linfo = patched_linfo + - ((i + dead_insns) * test->line_info_rec_size); - if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, - "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", - i, linfo[i].insn_off, - i - 1, linfo[i - 1].insn_off)) { - err = -1; - goto done; - } - if (CHECK(linfo[i].file_name_off != expected_linfo->file_name_off || - linfo[i].line_off != expected_linfo->line_off || - linfo[i].line_col != expected_linfo->line_col, - "linfo[%u] (%u, %u, %u) != (%u, %u, %u)", i, - linfo[i].file_name_off, - linfo[i].line_off, - linfo[i].line_col, - expected_linfo->file_name_off, - expected_linfo->line_off, - expected_linfo->line_col)) { - err = -1; - goto done; - } - } - - if (!jited_cnt) { - fprintf(stderr, "not jited. skipping jited_line_info check. "); - err = 0; - goto done; - } - - if (CHECK(jited_linfo[0] != jited_ksyms[0], - "jited_linfo[0]:%lx != jited_ksyms[0]:%lx", - (long)(jited_linfo[0]), (long)(jited_ksyms[0]))) { - err = -1; - goto done; - } - - ksyms_found = 1; - cur_func_len = jited_func_lens[0]; - cur_func_ksyms = jited_ksyms[0]; - for (i = 1; i < jited_cnt; i++) { - if (ksyms_found < nr_jited_ksyms && - jited_linfo[i] == jited_ksyms[ksyms_found]) { - cur_func_ksyms = jited_ksyms[ksyms_found]; - cur_func_len = jited_ksyms[ksyms_found]; - ksyms_found++; - continue; - } - - if (CHECK(jited_linfo[i] <= jited_linfo[i - 1], - "jited_linfo[%u]:%lx <= jited_linfo[%u]:%lx", - i, (long)jited_linfo[i], - i - 1, (long)(jited_linfo[i - 1]))) { - err = -1; - goto done; - } - - if (CHECK(jited_linfo[i] - cur_func_ksyms > cur_func_len, - "jited_linfo[%u]:%lx - %lx > %u", - i, (long)jited_linfo[i], (long)cur_func_ksyms, - cur_func_len)) { - err = -1; - goto done; - } - } - - if (CHECK(ksyms_found != nr_jited_ksyms, - "ksyms_found:%u != nr_jited_ksyms:%u", - ksyms_found, nr_jited_ksyms)) { - err = -1; - goto done; - } - - err = 0; - -done: - free(linfo); - free(jited_linfo); - free(jited_ksyms); - free(jited_func_lens); - return err; -} - -static int do_test_info_raw(unsigned int test_num) -{ - const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; - unsigned int raw_btf_size, linfo_str_off, linfo_size; - int btf_fd = -1, prog_fd = -1, err = 0; - void *raw_btf, *patched_linfo = NULL; - const char *ret_next_str; - union bpf_attr attr = {}; - - fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr); - raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, - test->str_sec, test->str_sec_size, - &raw_btf_size, &ret_next_str); - - if (!raw_btf) - return -1; - - *btf_log_buf = '\0'; - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); - free(raw_btf); - - if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { - err = -1; - goto done; - } - - if (*btf_log_buf && args.always_log) - fprintf(stderr, "\n%s", btf_log_buf); - *btf_log_buf = '\0'; - - linfo_str_off = ret_next_str - test->str_sec; - patched_linfo = patch_name_tbd(test->line_info, - test->str_sec, linfo_str_off, - test->str_sec_size, &linfo_size); - if (IS_ERR(patched_linfo)) { - fprintf(stderr, "error in creating raw bpf_line_info"); - err = -1; - goto done; - } - - attr.prog_type = test->prog_type; - attr.insns = ptr_to_u64(test->insns); - attr.insn_cnt = probe_prog_length(test->insns); - attr.license = ptr_to_u64("GPL"); - attr.prog_btf_fd = btf_fd; - attr.func_info_rec_size = test->func_info_rec_size; - attr.func_info_cnt = test->func_info_cnt; - attr.func_info = ptr_to_u64(test->func_info); - attr.log_buf = ptr_to_u64(btf_log_buf); - attr.log_size = BTF_LOG_BUF_SIZE; - attr.log_level = 1; - if (linfo_size) { - attr.line_info_rec_size = test->line_info_rec_size; - attr.line_info = ptr_to_u64(patched_linfo); - attr.line_info_cnt = linfo_size / attr.line_info_rec_size; - } - - prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); - err = ((prog_fd == -1) != test->expected_prog_load_failure); - if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", - prog_fd, test->expected_prog_load_failure, errno) || - CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), - "expected err_str:%s", test->err_str)) { - err = -1; - goto done; - } - - if (prog_fd == -1) - goto done; - - err = test_get_finfo(test, prog_fd); - if (err) - goto done; - - err = test_get_linfo(test, patched_linfo, - attr.line_info_cnt - test->dead_code_cnt, - prog_fd); - if (err) - goto done; - -done: - if (!err) - fprintf(stderr, "OK"); - - if (*btf_log_buf && (err || args.always_log)) - fprintf(stderr, "\n%s", btf_log_buf); - - if (btf_fd != -1) - close(btf_fd); - if (prog_fd != -1) - close(prog_fd); - - if (!IS_ERR(patched_linfo)) - free(patched_linfo); - - return err; -} - -static int test_info_raw(void) -{ - unsigned int i; - int err = 0; - - if (args.info_raw_test_num) - return count_result(do_test_info_raw(args.info_raw_test_num)); - - for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) - err |= count_result(do_test_info_raw(i)); - - return err; -} - -struct btf_raw_data { - __u32 raw_types[MAX_NR_RAW_U32]; - const char *str_sec; - __u32 str_sec_size; -}; - -struct btf_dedup_test { - const char *descr; - struct btf_raw_data input; - struct btf_raw_data expect; - struct btf_dedup_opts opts; -}; - -const struct btf_dedup_test dedup_tests[] = { - -{ - .descr = "dedup: unused strings filtering", - .input = { - .raw_types = { - BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 4), - BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 64, 8), - BTF_END_RAW, - }, - BTF_STR_SEC("\0unused\0int\0foo\0bar\0long"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), - BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0long"), - }, - .opts = { - .dont_resolve_fwds = false, - }, -}, -{ - .descr = "dedup: strings deduplication", - .input = { - .raw_types = { - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), - BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), - BTF_TYPE_INT_ENC(NAME_NTH(3), BTF_INT_SIGNED, 0, 32, 4), - BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 64, 8), - BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0long int\0int\0long int\0int"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), - BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0long int"), - }, - .opts = { - .dont_resolve_fwds = false, - }, -}, -{ - .descr = "dedup: struct example #1", - /* - * struct s { - * struct s *next; - * const int *a; - * int b[16]; - * int c; - * } - */ - .input = { - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* int[16] */ - BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ - /* struct s { */ - BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */ - BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */ - BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */ - BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */ - BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */ - /* ptr -> [3] struct s */ - BTF_PTR_ENC(3), /* [4] */ - /* ptr -> [6] const int */ - BTF_PTR_ENC(6), /* [5] */ - /* const -> [1] int */ - BTF_CONST_ENC(1), /* [6] */ - - /* full copy of the above */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */ - BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */ - BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */ - BTF_MEMBER_ENC(NAME_NTH(3), 10, 0), - BTF_MEMBER_ENC(NAME_NTH(4), 11, 64), - BTF_MEMBER_ENC(NAME_NTH(5), 8, 128), - BTF_MEMBER_ENC(NAME_NTH(6), 7, 640), - BTF_PTR_ENC(9), /* [10] */ - BTF_PTR_ENC(12), /* [11] */ - BTF_CONST_ENC(7), /* [12] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"), - }, - .expect = { - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* int[16] */ - BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ - /* struct s { */ - BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */ - BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */ - BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */ - BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */ - BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */ - /* ptr -> [3] struct s */ - BTF_PTR_ENC(3), /* [4] */ - /* ptr -> [6] const int */ - BTF_PTR_ENC(6), /* [5] */ - /* const -> [1] int */ - BTF_CONST_ENC(1), /* [6] */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"), - }, - .opts = { - .dont_resolve_fwds = false, - }, -}, -{ - .descr = "dedup: struct <-> fwd resolution w/ hash collision", - /* - * // CU 1: - * struct x; - * struct s { - * struct x *x; - * }; - * // CU 2: - * struct x {}; - * struct s { - * struct x *x; - * }; - */ - .input = { - .raw_types = { - /* CU 1 */ - BTF_FWD_ENC(NAME_TBD, 0 /* struct fwd */), /* [1] fwd x */ - BTF_PTR_ENC(1), /* [2] ptr -> [1] */ - BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] struct s */ - BTF_MEMBER_ENC(NAME_TBD, 2, 0), - /* CU 2 */ - BTF_STRUCT_ENC(NAME_TBD, 0, 0), /* [4] struct x */ - BTF_PTR_ENC(4), /* [5] ptr -> [4] */ - BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [6] struct s */ - BTF_MEMBER_ENC(NAME_TBD, 5, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0x\0s\0x\0x\0s\0x\0"), - }, - .expect = { - .raw_types = { - BTF_PTR_ENC(3), /* [1] ptr -> [3] */ - BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [2] struct s */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_STRUCT_ENC(NAME_NTH(2), 0, 0), /* [3] struct x */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0s\0x"), - }, - .opts = { - .dont_resolve_fwds = false, - .dedup_table_size = 1, /* force hash collisions */ - }, -}, -{ - .descr = "dedup: void equiv check", - /* - * // CU 1: - * struct s { - * struct {} *x; - * }; - * // CU 2: - * struct s { - * int *x; - * }; - */ - .input = { - .raw_types = { - /* CU 1 */ - BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ - BTF_PTR_ENC(1), /* [2] ptr -> [1] */ - BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ - BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), - /* CU 2 */ - BTF_PTR_ENC(0), /* [4] ptr -> void */ - BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ - BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0s\0x"), - }, - .expect = { - .raw_types = { - /* CU 1 */ - BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */ - BTF_PTR_ENC(1), /* [2] ptr -> [1] */ - BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */ - BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), - /* CU 2 */ - BTF_PTR_ENC(0), /* [4] ptr -> void */ - BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */ - BTF_MEMBER_ENC(NAME_NTH(2), 4, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0s\0x"), - }, - .opts = { - .dont_resolve_fwds = false, - .dedup_table_size = 1, /* force hash collisions */ - }, -}, -{ - .descr = "dedup: all possible kinds (no duplicates)", - .input = { - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_ENUM_ENC(NAME_TBD, 1), - BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ - BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ - BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ - BTF_PTR_ENC(0), /* [8] ptr */ - BTF_CONST_ENC(8), /* [9] const */ - BTF_VOLATILE_ENC(8), /* [10] volatile */ - BTF_RESTRICT_ENC(8), /* [11] restrict */ - BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), - BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_ENUM_ENC(NAME_TBD, 1), - BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ - BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ - BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ - BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ - BTF_PTR_ENC(0), /* [8] ptr */ - BTF_CONST_ENC(8), /* [9] const */ - BTF_VOLATILE_ENC(8), /* [10] volatile */ - BTF_RESTRICT_ENC(8), /* [11] restrict */ - BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), - BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ - BTF_END_RAW, - }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), - }, - .opts = { - .dont_resolve_fwds = false, - }, -}, -{ - .descr = "dedup: no int duplicates", - .input = { - .raw_types = { - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), - /* different name */ - BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), - /* different encoding */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), - /* different bit offset */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), - /* different bit size */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), - /* different byte size */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0some other int"), - }, - .expect = { - .raw_types = { - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), - /* different name */ - BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), - /* different encoding */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), - /* different bit offset */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), - /* different bit size */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), - /* different byte size */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), - BTF_END_RAW, - }, - BTF_STR_SEC("\0int\0some other int"), - }, - .opts = { - .dont_resolve_fwds = false, - }, -}, -{ - .descr = "dedup: enum fwd resolution", - .input = { - .raw_types = { - /* [1] fwd enum 'e1' before full enum */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), - /* [2] full enum 'e1' after fwd */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), - BTF_ENUM_ENC(NAME_NTH(2), 123), - /* [3] full enum 'e2' before fwd */ - BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), - BTF_ENUM_ENC(NAME_NTH(4), 456), - /* [4] fwd enum 'e2' after full enum */ - BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), - /* [5] incompatible fwd enum with different size */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), - /* [6] incompatible full enum with different value */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), - BTF_ENUM_ENC(NAME_NTH(2), 321), - BTF_END_RAW, - }, - BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), - }, - .expect = { - .raw_types = { - /* [1] full enum 'e1' */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), - BTF_ENUM_ENC(NAME_NTH(2), 123), - /* [2] full enum 'e2' */ - BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), - BTF_ENUM_ENC(NAME_NTH(4), 456), - /* [3] incompatible fwd enum with different size */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), - /* [4] incompatible full enum with different value */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), - BTF_ENUM_ENC(NAME_NTH(2), 321), - BTF_END_RAW, - }, - BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), - }, - .opts = { - .dont_resolve_fwds = false, - }, -}, -{ - .descr = "dedup: datasec and vars pass-through", - .input = { - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* static int t */ - BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [2] */ - /* .bss section */ /* [3] */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - /* int, referenced from [5] */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */ - /* another static int t */ - BTF_VAR_ENC(NAME_NTH(2), 4, 0), /* [5] */ - /* another .bss section */ /* [6] */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(5, 0, 4), - BTF_END_RAW, - }, - BTF_STR_SEC("\0.bss\0t"), - }, - .expect = { - .raw_types = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* static int t */ - BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [2] */ - /* .bss section */ /* [3] */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - /* another static int t */ - BTF_VAR_ENC(NAME_NTH(2), 1, 0), /* [4] */ - /* another .bss section */ /* [5] */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(4, 0, 4), - BTF_END_RAW, - }, - BTF_STR_SEC("\0.bss\0t"), - }, - .opts = { - .dont_resolve_fwds = false, - .dedup_table_size = 1 - }, -}, - -}; - -static int btf_type_size(const struct btf_type *t) -{ - int base_size = sizeof(struct btf_type); - __u16 vlen = BTF_INFO_VLEN(t->info); - __u16 kind = BTF_INFO_KIND(t->info); - - switch (kind) { - case BTF_KIND_FWD: - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - return base_size; - case BTF_KIND_INT: - return base_size + sizeof(__u32); - case BTF_KIND_ENUM: - return base_size + vlen * sizeof(struct btf_enum); - case BTF_KIND_ARRAY: - return base_size + sizeof(struct btf_array); - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - return base_size + vlen * sizeof(struct btf_member); - case BTF_KIND_FUNC_PROTO: - return base_size + vlen * sizeof(struct btf_param); - case BTF_KIND_VAR: - return base_size + sizeof(struct btf_var); - case BTF_KIND_DATASEC: - return base_size + vlen * sizeof(struct btf_var_secinfo); - default: - fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); - return -EINVAL; - } -} - -static void dump_btf_strings(const char *strs, __u32 len) -{ - const char *cur = strs; - int i = 0; - - while (cur < strs + len) { - fprintf(stderr, "string #%d: '%s'\n", i, cur); - cur += strlen(cur) + 1; - i++; - } -} - -static int do_test_dedup(unsigned int test_num) -{ - const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; - __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; - const struct btf_header *test_hdr, *expect_hdr; - struct btf *test_btf = NULL, *expect_btf = NULL; - const void *test_btf_data, *expect_btf_data; - const char *ret_test_next_str, *ret_expect_next_str; - const char *test_strs, *expect_strs; - const char *test_str_cur, *test_str_end; - const char *expect_str_cur, *expect_str_end; - unsigned int raw_btf_size; - void *raw_btf; - int err = 0, i; - - fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr); - - raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types, - test->input.str_sec, test->input.str_sec_size, - &raw_btf_size, &ret_test_next_str); - if (!raw_btf) - return -1; - test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); - free(raw_btf); - if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", - PTR_ERR(test_btf))) { - err = -1; - goto done; - } - - raw_btf = btf_raw_create(&hdr_tmpl, test->expect.raw_types, - test->expect.str_sec, - test->expect.str_sec_size, - &raw_btf_size, &ret_expect_next_str); - if (!raw_btf) - return -1; - expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); - free(raw_btf); - if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", - PTR_ERR(expect_btf))) { - err = -1; - goto done; - } - - err = btf__dedup(test_btf, NULL, &test->opts); - if (CHECK(err, "btf_dedup failed errno:%d", err)) { - err = -1; - goto done; - } - - test_btf_data = btf__get_raw_data(test_btf, &test_btf_size); - expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size); - if (CHECK(test_btf_size != expect_btf_size, - "test_btf_size:%u != expect_btf_size:%u", - test_btf_size, expect_btf_size)) { - err = -1; - goto done; - } - - test_hdr = test_btf_data; - test_strs = test_btf_data + sizeof(*test_hdr) + test_hdr->str_off; - expect_hdr = expect_btf_data; - expect_strs = expect_btf_data + sizeof(*test_hdr) + expect_hdr->str_off; - if (CHECK(test_hdr->str_len != expect_hdr->str_len, - "test_hdr->str_len:%u != expect_hdr->str_len:%u", - test_hdr->str_len, expect_hdr->str_len)) { - fprintf(stderr, "\ntest strings:\n"); - dump_btf_strings(test_strs, test_hdr->str_len); - fprintf(stderr, "\nexpected strings:\n"); - dump_btf_strings(expect_strs, expect_hdr->str_len); - err = -1; - goto done; - } - - test_str_cur = test_strs; - test_str_end = test_strs + test_hdr->str_len; - expect_str_cur = expect_strs; - expect_str_end = expect_strs + expect_hdr->str_len; - while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { - size_t test_len, expect_len; - - test_len = strlen(test_str_cur); - expect_len = strlen(expect_str_cur); - if (CHECK(test_len != expect_len, - "test_len:%zu != expect_len:%zu " - "(test_str:%s, expect_str:%s)", - test_len, expect_len, test_str_cur, expect_str_cur)) { - err = -1; - goto done; - } - if (CHECK(strcmp(test_str_cur, expect_str_cur), - "test_str:%s != expect_str:%s", - test_str_cur, expect_str_cur)) { - err = -1; - goto done; - } - test_str_cur += test_len + 1; - expect_str_cur += expect_len + 1; - } - if (CHECK(test_str_cur != test_str_end, - "test_str_cur:%p != test_str_end:%p", - test_str_cur, test_str_end)) { - err = -1; - goto done; - } - - test_nr_types = btf__get_nr_types(test_btf); - expect_nr_types = btf__get_nr_types(expect_btf); - if (CHECK(test_nr_types != expect_nr_types, - "test_nr_types:%u != expect_nr_types:%u", - test_nr_types, expect_nr_types)) { - err = -1; - goto done; - } - - for (i = 1; i <= test_nr_types; i++) { - const struct btf_type *test_type, *expect_type; - int test_size, expect_size; - - test_type = btf__type_by_id(test_btf, i); - expect_type = btf__type_by_id(expect_btf, i); - test_size = btf_type_size(test_type); - expect_size = btf_type_size(expect_type); - - if (CHECK(test_size != expect_size, - "type #%d: test_size:%d != expect_size:%u", - i, test_size, expect_size)) { - err = -1; - goto done; - } - if (CHECK(memcmp((void *)test_type, - (void *)expect_type, - test_size), - "type #%d: contents differ", i)) { - err = -1; - goto done; - } - } - -done: - if (!err) - fprintf(stderr, "OK"); - if (!IS_ERR(test_btf)) - btf__free(test_btf); - if (!IS_ERR(expect_btf)) - btf__free(expect_btf); - - return err; -} - -static int test_dedup(void) -{ - unsigned int i; - int err = 0; - - if (args.dedup_test_num) - return count_result(do_test_dedup(args.dedup_test_num)); - - for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++) - err |= count_result(do_test_dedup(i)); - - return err; -} - -static void usage(const char *cmd) -{ - fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" - "\t[-g btf_get_info_test_num (1 - %zu)] |\n" - "\t[-f btf_file_test_num (1 - %zu)] |\n" - "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" - "\t[-p (pretty print test)] |\n" - "\t[-d btf_dedup_test_num (1 - %zu)]]\n", - cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests), - ARRAY_SIZE(dedup_tests)); -} - -static int parse_args(int argc, char **argv) -{ - const char *optstr = "hlpk:f:r:g:d:"; - int opt; - - while ((opt = getopt(argc, argv, optstr)) != -1) { - switch (opt) { - case 'l': - args.always_log = true; - break; - case 'f': - args.file_test_num = atoi(optarg); - args.file_test = true; - break; - case 'r': - args.raw_test_num = atoi(optarg); - args.raw_test = true; - break; - case 'g': - args.get_info_test_num = atoi(optarg); - args.get_info_test = true; - break; - case 'p': - args.pprint_test = true; - break; - case 'k': - args.info_raw_test_num = atoi(optarg); - args.info_raw_test = true; - break; - case 'd': - args.dedup_test_num = atoi(optarg); - args.dedup_test = true; - break; - case 'h': - usage(argv[0]); - exit(0); - default: - usage(argv[0]); - return -1; - } - } - - if (args.raw_test_num && - (args.raw_test_num < 1 || - args.raw_test_num > ARRAY_SIZE(raw_tests))) { - fprintf(stderr, "BTF raw test number must be [1 - %zu]\n", - ARRAY_SIZE(raw_tests)); - return -1; - } - - if (args.file_test_num && - (args.file_test_num < 1 || - args.file_test_num > ARRAY_SIZE(file_tests))) { - fprintf(stderr, "BTF file test number must be [1 - %zu]\n", - ARRAY_SIZE(file_tests)); - return -1; - } - - if (args.get_info_test_num && - (args.get_info_test_num < 1 || - args.get_info_test_num > ARRAY_SIZE(get_info_tests))) { - fprintf(stderr, "BTF get info test number must be [1 - %zu]\n", - ARRAY_SIZE(get_info_tests)); - return -1; - } - - if (args.info_raw_test_num && - (args.info_raw_test_num < 1 || - args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) { - fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n", - ARRAY_SIZE(info_raw_tests)); - return -1; - } - - if (args.dedup_test_num && - (args.dedup_test_num < 1 || - args.dedup_test_num > ARRAY_SIZE(dedup_tests))) { - fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n", - ARRAY_SIZE(dedup_tests)); - return -1; - } - - return 0; -} - -static void print_summary(void) -{ - fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n", - pass_cnt - skip_cnt, skip_cnt, error_cnt); -} - -int main(int argc, char **argv) -{ - int err = 0; - - err = parse_args(argc, argv); - if (err) - return err; - - if (args.always_log) - libbpf_set_print(__base_pr); - - if (args.raw_test) - err |= test_raw(); - - if (args.get_info_test) - err |= test_get_info(); - - if (args.file_test) - err |= test_file(); - - if (args.pprint_test) - err |= test_pprint(); - - if (args.info_raw_test) - err |= test_info_raw(); - - if (args.dedup_test) - err |= test_dedup(); - - if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test || args.info_raw_test || args.dedup_test) - goto done; - - err |= test_raw(); - err |= test_get_info(); - err |= test_file(); - err |= test_info_raw(); - err |= test_dedup(); - -done: - print_summary(); - return err; -} -- cgit v1.3.1 From 5d680be3b014d5836ad6c2a8bdda70cce89dbfc8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 17 Sep 2020 15:02:19 +0900 Subject: perf parse-event: Fix cpu map refcounting Like evlist cpu map, evsel's cpu map should have a proper refcount. As it's created with a refcount, we don't need to get an extra count. Thanks to Arnaldo for the simpler suggestion. This, together with the following patch, fixes the following ASAN report: Direct leak of 840 byte(s) in 70 object(s) allocated from: #0 0x7fe36703f628 in malloc (/lib/x86_64-linux-gnu/libasan.so.5+0x107628) #1 0x559fbbf611ca in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x559fbbf6229c in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:237 #3 0x559fbbcc6c6d in __add_event util/parse-events.c:357 #4 0x559fbbcc6c6d in add_event_tool util/parse-events.c:408 #5 0x559fbbcc6c6d in parse_events_add_tool util/parse-events.c:1414 #6 0x559fbbd8474d in parse_events_parse util/parse-events.y:439 #7 0x559fbbcc95da in parse_events__scanner util/parse-events.c:2096 #8 0x559fbbcc95da in __parse_events util/parse-events.c:2141 #9 0x559fbbc2788b in check_parse_id tests/pmu-events.c:406 #10 0x559fbbc2788b in check_parse_id tests/pmu-events.c:393 #11 0x559fbbc2788b in check_parse_fake tests/pmu-events.c:436 #12 0x559fbbc2788b in metric_parse_fake tests/pmu-events.c:553 #13 0x559fbbc27e2d in test_parsing_fake tests/pmu-events.c:599 #14 0x559fbbc27e2d in test_parsing_fake tests/pmu-events.c:574 #15 0x559fbbc0109b in run_test tests/builtin-test.c:410 #16 0x559fbbc0109b in test_and_print tests/builtin-test.c:440 #17 0x559fbbc03e69 in __cmd_test tests/builtin-test.c:695 #18 0x559fbbc03e69 in cmd_test tests/builtin-test.c:807 #19 0x559fbbc691f4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:312 #20 0x559fbbb071a8 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:364 #21 0x559fbbb071a8 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:408 #22 0x559fbbb071a8 in main /home/namhyung/project/linux/tools/perf/perf.c:538 #23 0x7fe366b68cc9 in __libc_start_main ../csu/libc-start.c:308 And I've failed which commit introduced this bug as the code was heavily changed since then. ;-/ Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200917060219.1287863-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 667cbca1547a..4c6b3d287d54 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -353,7 +353,7 @@ __add_event(struct list_head *list, int *idx, const char *cpu_list) { struct evsel *evsel; - struct perf_cpu_map *cpus = pmu ? pmu->cpus : + struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : cpu_list ? perf_cpu_map__new(cpu_list) : NULL; if (init_attr) @@ -364,7 +364,7 @@ __add_event(struct list_head *list, int *idx, return NULL; (*idx)++; - evsel->core.cpus = perf_cpu_map__get(cpus); + evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); evsel->core.system_wide = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; -- cgit v1.3.1 From 0f1b550e29c1f9522f0a7a11ee2cf95d33a98d79 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 17 Sep 2020 15:02:18 +0900 Subject: perf parse-event: Release cpu_map refcount if evsel alloc failed Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200917060219.1287863-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4c6b3d287d54..fbe0d3143353 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -360,8 +360,10 @@ __add_event(struct list_head *list, int *idx, event_attr_init(attr); evsel = evsel__new_idx(attr, *idx); - if (!evsel) + if (!evsel) { + perf_cpu_map__put(cpus); return NULL; + } (*idx)++; evsel->core.cpus = cpus; -- cgit v1.3.1 From c57f5eaa094ee93b4e27f6624349bc90d41a4e68 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 13 Sep 2020 23:03:08 +0200 Subject: perf machine: Add machine__for_each_dso() function Add the machine__for_each_dso() to iterate over all dso objects defined for the within a machine object. It will be used in the MMAP3 patch series. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Frank Ch. Eigler Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200913210313.1985612-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 12 ++++++++++++ tools/perf/util/machine.h | 4 ++++ 2 files changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 85587de027a5..7d4194ffc5b0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -3100,3 +3100,15 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch *addrp = map->unmap_ip(map, sym->start); return sym->name; } + +int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv) +{ + struct dso *pos; + int err = 0; + + list_for_each_entry(pos, &machine->dsos.head, node) { + if (fn(pos, machine, priv)) + err = -1; + } + return err; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 062c36a8433c..26368d3c1754 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -250,6 +250,10 @@ void machines__destroy_kernel_maps(struct machines *machines); size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); +typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv); + +int machine__for_each_dso(struct machine *machine, machine__dso_t fn, + void *priv); int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv); -- cgit v1.3.1 From 687986bbeb668068d39a47658edf4d7a6414eefb Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 11 Sep 2020 07:48:05 -0700 Subject: perf tools: Rename group to topdown The group.h/c only include TopDown group related functions. The name "group" is too generic and inaccurate. Use the name "topdown" to replace it. Move topdown related functions to a dedicated file, topdown.c. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200911144808.27603-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/Build | 2 +- tools/perf/arch/x86/util/group.c | 28 -------------------- tools/perf/arch/x86/util/topdown.c | 28 ++++++++++++++++++++ tools/perf/builtin-stat.c | 51 +----------------------------------- tools/perf/util/Build | 1 + tools/perf/util/group.h | 8 ------ tools/perf/util/topdown.c | 53 ++++++++++++++++++++++++++++++++++++++ tools/perf/util/topdown.h | 10 +++++++ 8 files changed, 94 insertions(+), 87 deletions(-) delete mode 100644 tools/perf/arch/x86/util/group.c create mode 100644 tools/perf/arch/x86/util/topdown.c delete mode 100644 tools/perf/util/group.h create mode 100644 tools/perf/util/topdown.c create mode 100644 tools/perf/util/topdown.h (limited to 'tools') diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 47f9c56e744f..347c39b960eb 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -3,7 +3,7 @@ perf-y += tsc.o perf-y += pmu.o perf-y += kvm-stat.o perf-y += perf_regs.o -perf-y += group.o +perf-y += topdown.o perf-y += machine.o perf-y += event.o diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c deleted file mode 100644 index e2f8034b8973..000000000000 --- a/tools/perf/arch/x86/util/group.c +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "api/fs/fs.h" -#include "util/group.h" - -/* - * Check whether we can use a group for top down. - * Without a group may get bad results due to multiplexing. - */ -bool arch_topdown_check_group(bool *warn) -{ - int n; - - if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) - return false; - if (n > 0) { - *warn = true; - return false; - } - return true; -} - -void arch_topdown_group_warn(void) -{ - fprintf(stderr, - "nmi_watchdog enabled with topdown. May give wrong results.\n" - "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); -} diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c new file mode 100644 index 000000000000..597e963fb3e7 --- /dev/null +++ b/tools/perf/arch/x86/util/topdown.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "api/fs/fs.h" +#include "util/topdown.h" + +/* + * Check whether we can use a group for top down. + * Without a group may get bad results due to multiplexing. + */ +bool arch_topdown_check_group(bool *warn) +{ + int n; + + if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) + return false; + if (n > 0) { + *warn = true; + return false; + } + return true; +} + +void arch_topdown_group_warn(void) +{ + fprintf(stderr, + "nmi_watchdog enabled with topdown. May give wrong results.\n" + "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); +} diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7f8d756d9408..a39bb28b44f9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -56,7 +56,7 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/counts.h" -#include "util/group.h" +#include "util/topdown.h" #include "util/session.h" #include "util/tool.h" #include "util/string2.h" @@ -1481,55 +1481,6 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) return 0; } -static int topdown_filter_events(const char **attr, char **str, bool use_group) -{ - int off = 0; - int i; - int len = 0; - char *s; - - for (i = 0; attr[i]; i++) { - if (pmu_have_event("cpu", attr[i])) { - len += strlen(attr[i]) + 1; - attr[i - off] = attr[i]; - } else - off++; - } - attr[i - off] = NULL; - - *str = malloc(len + 1 + 2); - if (!*str) - return -1; - s = *str; - if (i - off == 0) { - *s = 0; - return 0; - } - if (use_group) - *s++ = '{'; - for (i = 0; attr[i]; i++) { - strcpy(s, attr[i]); - s += strlen(s); - *s++ = ','; - } - if (use_group) { - s[-1] = '}'; - *s = 0; - } else - s[-1] = 0; - return 0; -} - -__weak bool arch_topdown_check_group(bool *warn) -{ - *warn = false; - return false; -} - -__weak void arch_topdown_group_warn(void) -{ -} - /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: diff --git a/tools/perf/util/Build b/tools/perf/util/Build index cd5e41960e64..eebbd5223cae 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -101,6 +101,7 @@ perf-y += call-path.o perf-y += rwsem.o perf-y += thread-stack.o perf-y += spark.o +perf-y += topdown.o perf-$(CONFIG_AUXTRACE) += auxtrace.o perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ perf-$(CONFIG_AUXTRACE) += intel-pt.o diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h deleted file mode 100644 index f36c7e31780a..000000000000 --- a/tools/perf/util/group.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef GROUP_H -#define GROUP_H 1 - -bool arch_topdown_check_group(bool *warn); -void arch_topdown_group_warn(void); - -#endif diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c new file mode 100644 index 000000000000..a085b3c77c27 --- /dev/null +++ b/tools/perf/util/topdown.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "pmu.h" +#include "topdown.h" + +int topdown_filter_events(const char **attr, char **str, bool use_group) +{ + int off = 0; + int i; + int len = 0; + char *s; + + for (i = 0; attr[i]; i++) { + if (pmu_have_event("cpu", attr[i])) { + len += strlen(attr[i]) + 1; + attr[i - off] = attr[i]; + } else + off++; + } + attr[i - off] = NULL; + + *str = malloc(len + 1 + 2); + if (!*str) + return -1; + s = *str; + if (i - off == 0) { + *s = 0; + return 0; + } + if (use_group) + *s++ = '{'; + for (i = 0; attr[i]; i++) { + strcpy(s, attr[i]); + s += strlen(s); + *s++ = ','; + } + if (use_group) { + s[-1] = '}'; + *s = 0; + } else + s[-1] = 0; + return 0; +} + +__weak bool arch_topdown_check_group(bool *warn) +{ + *warn = false; + return false; +} + +__weak void arch_topdown_group_warn(void) +{ +} diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h new file mode 100644 index 000000000000..e3d70e95f4f1 --- /dev/null +++ b/tools/perf/util/topdown.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef TOPDOWN_H +#define TOPDOWN_H 1 + +bool arch_topdown_check_group(bool *warn); +void arch_topdown_group_warn(void); + +int topdown_filter_events(const char **attr, char **str, bool use_group); + +#endif -- cgit v1.3.1 From acb65150a47c2baea5ea02fd7cb66460f7733fcd Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 11 Sep 2020 07:48:06 -0700 Subject: perf record: Support sample-read topdown metric group With the hardware TopDown metrics feature, sample-read feature should be supported for a topdown group, e.g., sample a non-topdown event and read a topdown metric group. But the current perf record code errors out. For a topdown metric group, the slots event must be the leader of the group, but the leader slots event doesn't support sampling. To support sample-read the topdown metric group, use the 2nd event of the group as the "leader" for the purposes of sampling. Only the platform with Topdown metic feature supports sample-read the topdown group. Add arch_topdown_sample_read() to indicate whether the topdown group supports sample-read. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200911144808.27603-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/topdown.c | 35 +++++++++++++++++++++++++++++++++++ tools/perf/util/record.c | 3 ++- tools/perf/util/topdown.c | 5 +++++ tools/perf/util/topdown.h | 2 ++ 4 files changed, 44 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index 597e963fb3e7..2f3d96aa92a5 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include "api/fs/fs.h" +#include "util/pmu.h" #include "util/topdown.h" /* @@ -26,3 +27,37 @@ void arch_topdown_group_warn(void) "nmi_watchdog enabled with topdown. May give wrong results.\n" "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); } + +#define TOPDOWN_SLOTS 0x0400 + +static bool is_topdown_slots_event(struct evsel *counter) +{ + if (!counter->pmu_name) + return false; + + if (strcmp(counter->pmu_name, "cpu")) + return false; + + if (counter->core.attr.config == TOPDOWN_SLOTS) + return true; + + return false; +} + +/* + * Check whether a topdown group supports sample-read. + * + * Only Topdown metic supports sample-read. The slots + * event must be the leader of the topdown group. + */ + +bool arch_topdown_sample_read(struct evsel *leader) +{ + if (!pmu_have_event("cpu", "slots")) + return false; + + if (is_topdown_slots_event(leader)) + return true; + + return false; +} diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index ea9aa1d7cf50..07e4b96a6625 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -14,6 +14,7 @@ #include "util/perf_api_probe.h" #include "record.h" #include "../perf-sys.h" +#include "topdown.h" /* * evsel__config_leader_sampling() uses special rules for leader sampling. @@ -24,7 +25,7 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl { struct evsel *leader = evsel->leader; - if (evsel__is_aux_event(leader)) { + if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader)) { evlist__for_each_entry(evlist, evsel) { if (evsel->leader == leader && evsel != evsel->leader) return evsel; diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c index a085b3c77c27..1081b20f9891 100644 --- a/tools/perf/util/topdown.c +++ b/tools/perf/util/topdown.c @@ -51,3 +51,8 @@ __weak bool arch_topdown_check_group(bool *warn) __weak void arch_topdown_group_warn(void) { } + +__weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused) +{ + return false; +} diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h index e3d70e95f4f1..2f0d0b887639 100644 --- a/tools/perf/util/topdown.h +++ b/tools/perf/util/topdown.h @@ -1,9 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef TOPDOWN_H #define TOPDOWN_H 1 +#include "evsel.h" bool arch_topdown_check_group(bool *warn); void arch_topdown_group_warn(void); +bool arch_topdown_sample_read(struct evsel *leader); int topdown_filter_events(const char **attr, char **str, bool use_group); -- cgit v1.3.1 From 55c36a9fc2aaac6a0c77eeaeefd8d8f691c98e19 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Sep 2020 07:48:07 -0700 Subject: perf stat: Support new per thread TopDown metrics Icelake has support for reporting per thread TopDown metrics. These are reported differently than the previous TopDown support, each metric is standalone, but scaled to pipeline "slots". We don't need to do anything special for HyperThreading anymore. Teach perf stat --topdown to handle these new metrics and print them in the same way as the previous TopDown metrics. The restrictions of only being able to report information per core is gone. Signed-off-by: Andi Kleen Co-developed-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200911144808.27603-4-kan.liang@linux.intel.com Signed-off-by: Kan Liang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 7 ++- tools/perf/builtin-stat.c | 30 +++++++++++- tools/perf/util/stat-shadow.c | 89 ++++++++++++++++++++++++++++++++++ tools/perf/util/stat.c | 4 ++ tools/perf/util/stat.h | 8 +++ 5 files changed, 134 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 7d18694e592a..5bf3d7ae4660 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -363,6 +363,11 @@ if the workload is actually bound by the CPU and not by something else. For best results it is usually a good idea to use it with interval mode like -I 1000, as the bottleneck of workloads can change often. +This enables --metric-only, unless overridden with --no-metric-only. + +The following restrictions only apply to older Intel CPUs and Atom, +on newer CPUs (IceLake and later) TopDown can be collected for any thread: + The top down metrics are collected per core instead of per CPU thread. Per core mode is automatically enabled and -a (global monitoring) is needed, requiring root rights or @@ -374,8 +379,6 @@ echo 0 > /proc/sys/kernel/nmi_watchdog for best results. Otherwise the bottlenecks may be inconsistent on workload with changing phases. -This enables --metric-only, unless overridden with --no-metric-only. - To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a39bb28b44f9..17fbbd03e154 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -128,6 +128,15 @@ static const char * topdown_attrs[] = { NULL, }; +static const char *topdown_metric_attrs[] = { + "slots", + "topdown-retiring", + "topdown-bad-spec", + "topdown-fe-bound", + "topdown-be-bound", + NULL, +}; + static const char *smi_cost_attrs = { "{" "msr/aperf/," @@ -1677,6 +1686,24 @@ static int add_default_attributes(void) char *str = NULL; bool warn = false; + if (!force_metric_only) + stat_config.metric_only = true; + + if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) { + pr_err("Out of memory\n"); + return -1; + } + if (topdown_metric_attrs[0] && str) { + if (!stat_config.interval && !stat_config.metric_only) { + fprintf(stat_config.output, + "Topdown accuracy may decrease when measuring long periods.\n" + "Please print the result regularly, e.g. -I1000\n"); + } + goto setup_metrics; + } + + zfree(&str); + if (stat_config.aggr_mode != AGGR_GLOBAL && stat_config.aggr_mode != AGGR_CORE) { pr_err("top down event configuration requires --per-core mode\n"); @@ -1688,8 +1715,6 @@ static int add_default_attributes(void) return -1; } - if (!force_metric_only) - stat_config.metric_only = true; if (topdown_filter_events(topdown_attrs, &str, arch_topdown_check_group(&warn)) < 0) { pr_err("Out of memory\n"); @@ -1698,6 +1723,7 @@ static int add_default_attributes(void) if (topdown_attrs[0] && str) { if (warn) arch_topdown_group_warn(); +setup_metrics: err = parse_events(evsel_list, str, &errinfo); if (err) { fprintf(stderr, diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 924b54d15d54..901265127e36 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -241,6 +241,18 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu, count); + else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) + update_runtime_stat(st, STAT_TOPDOWN_RETIRING, + ctx, cpu, count); + else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) + update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, + ctx, cpu, count); + else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) + update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, + ctx, cpu, count); + else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) + update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, + ctx, cpu, count); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, ctx, cpu, count); @@ -705,6 +717,47 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) return sanitize_val(1.0 - sum); } +/* + * Kernel reports metrics multiplied with slots. To get back + * the ratios we need to recreate the sum. + */ + +static double td_metric_ratio(int ctx, int cpu, + enum stat_type type, + struct runtime_stat *stat) +{ + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu); + double d = runtime_stat_avg(stat, type, ctx, cpu); + + if (sum) + return d / sum; + return 0; +} + +/* + * ... but only if most of the values are actually available. + * We allow two missing. + */ + +static bool full_td(int ctx, int cpu, + struct runtime_stat *stat) +{ + int c = 0; + + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0) + c++; + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0) + c++; + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0) + c++; + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0) + c++; + return c >= 2; +} + static void print_smi_cost(struct perf_stat_config *config, int cpu, struct evsel *evsel, struct perf_stat_output_ctx *out, @@ -1073,6 +1126,42 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && + full_td(ctx, cpu, st)) { + double retiring = td_metric_ratio(ctx, cpu, + STAT_TOPDOWN_RETIRING, st); + + if (retiring > 0.7) + color = PERF_COLOR_GREEN; + print_metric(config, ctxp, color, "%8.1f%%", "retiring", + retiring * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && + full_td(ctx, cpu, st)) { + double fe_bound = td_metric_ratio(ctx, cpu, + STAT_TOPDOWN_FE_BOUND, st); + + if (fe_bound > 0.2) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", + fe_bound * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && + full_td(ctx, cpu, st)) { + double be_bound = td_metric_ratio(ctx, cpu, + STAT_TOPDOWN_BE_BOUND, st); + + if (be_bound > 0.2) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "backend bound", + be_bound * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && + full_td(ctx, cpu, st)) { + double bad_spec = td_metric_ratio(ctx, cpu, + STAT_TOPDOWN_BAD_SPEC, st); + + if (bad_spec > 0.1) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", + bad_spec * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index cdb154381a87..bd0decd6d753 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -95,6 +95,10 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), + ID(TOPDOWN_RETIRING, topdown-retiring), + ID(TOPDOWN_BAD_SPEC, topdown-bad-spec), + ID(TOPDOWN_FE_BOUND, topdown-fe-bound), + ID(TOPDOWN_BE_BOUND, topdown-be-bound), ID(SMI_NUM, msr/smi/), ID(APERF, msr/aperf/), }; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 9911fc6adbfd..f36c8c92efa0 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -28,6 +28,10 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, + PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING, + PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC, + PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND, + PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND, PERF_STAT_EVSEL_ID__SMI_NUM, PERF_STAT_EVSEL_ID__APERF, PERF_STAT_EVSEL_ID__MAX, @@ -82,6 +86,10 @@ enum stat_type { STAT_TOPDOWN_SLOTS_RETIRED, STAT_TOPDOWN_FETCH_BUBBLES, STAT_TOPDOWN_RECOVERY_BUBBLES, + STAT_TOPDOWN_RETIRING, + STAT_TOPDOWN_BAD_SPEC, + STAT_TOPDOWN_FE_BOUND, + STAT_TOPDOWN_BE_BOUND, STAT_SMI_NUM, STAT_APERF, STAT_MAX -- cgit v1.3.1 From 328781df86fa8f9299219c74ffae876bd625c6d1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Sep 2020 07:48:08 -0700 Subject: perf tools: Add documentation for topdown metrics Add some documentation how to use the topdown metrics in ring 3. Signed-off-by: Andi Kleen Co-developed-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200911144808.27603-5-kan.liang@linux.intel.com Signed-off-by: Kan Liang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/topdown.txt | 256 +++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 tools/perf/Documentation/topdown.txt (limited to 'tools') diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt new file mode 100644 index 000000000000..3c39bb3dc5fa --- /dev/null +++ b/tools/perf/Documentation/topdown.txt @@ -0,0 +1,256 @@ +Using TopDown metrics in user space +----------------------------------- + +Intel CPUs (since Sandy Bridge and Silvermont) support a TopDown +methology to break down CPU pipeline execution into 4 bottlenecks: +frontend bound, backend bound, bad speculation, retiring. + +For more details on Topdown see [1][5] + +Traditionally this was implemented by events in generic counters +and specific formulas to compute the bottlenecks. + +perf stat --topdown implements this. + +Full Top Down includes more levels that can break down the +bottlenecks further. This is not directly implemented in perf, +but available in other tools that can run on top of perf, +such as toplev[2] or vtune[3] + +New Topdown features in Ice Lake +=============================== + +With Ice Lake CPUs the TopDown metrics are directly available as +fixed counters and do not require generic counters. This allows +to collect TopDown always in addition to other events. + +% perf stat -a --topdown -I1000 +# time retiring bad speculation frontend bound backend bound + 1.001281330 23.0% 15.3% 29.6% 32.1% + 2.003009005 5.0% 6.8% 46.6% 41.6% + 3.004646182 6.7% 6.7% 46.0% 40.6% + 4.006326375 5.0% 6.4% 47.6% 41.0% + 5.007991804 5.1% 6.3% 46.3% 42.3% + 6.009626773 6.2% 7.1% 47.3% 39.3% + 7.011296356 4.7% 6.7% 46.2% 42.4% + 8.012951831 4.7% 6.7% 47.5% 41.1% +... + +This also enables measuring TopDown per thread/process instead +of only per core. + +Using TopDown through RDPMC in applications on Ice Lake +====================================================== + +For more fine grained measurements it can be useful to +access the new directly from user space. This is more complicated, +but drastically lowers overhead. + +On Ice Lake, there is a new fixed counter 3: SLOTS, which reports +"pipeline SLOTS" (cycles multiplied by core issue width) and a +metric register that reports slots ratios for the different bottleneck +categories. + +The metrics counter is CPU model specific and is not available on older +CPUs. + +Example code +============ + +Library functions to do the functionality described below +is also available in libjevents [4] + +The application opens a group with fixed counter 3 (SLOTS) and any +metric event, and allow user programs to read the performance counters. + +Fixed counter 3 is mapped to a pseudo event event=0x00, umask=04, +so the perf_event_attr structure should be initialized with +{ .config = 0x0400, .type = PERF_TYPE_RAW } +The metric events are mapped to the pseudo event event=0x00, umask=0x8X. +For example, the perf_event_attr structure can be initialized with +{ .config = 0x8000, .type = PERF_TYPE_RAW } for Retiring metric event +The Fixed counter 3 must be the leader of the group. + +#include +#include +#include + +/* Provide own perf_event_open stub because glibc doesn't */ +__attribute__((weak)) +int perf_event_open(struct perf_event_attr *attr, pid_t pid, + int cpu, int group_fd, unsigned long flags) +{ + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +/* Open slots counter file descriptor for current task. */ +struct perf_event_attr slots = { + .type = PERF_TYPE_RAW, + .size = sizeof(struct perf_event_attr), + .config = 0x400, + .exclude_kernel = 1, +}; + +int slots_fd = perf_event_open(&slots, 0, -1, -1, 0); +if (slots_fd < 0) + ... error ... + +/* + * Open metrics event file descriptor for current task. + * Set slots event as the leader of the group. + */ +struct perf_event_attr metrics = { + .type = PERF_TYPE_RAW, + .size = sizeof(struct perf_event_attr), + .config = 0x8000, + .exclude_kernel = 1, +}; + +int metrics_fd = perf_event_open(&metrics, 0, -1, slots_fd, 0); +if (metrics_fd < 0) + ... error ... + + +The RDPMC instruction (or _rdpmc compiler intrinsic) can now be used +to read slots and the topdown metrics at different points of the program: + +#include +#include + +#define RDPMC_FIXED (1 << 30) /* return fixed counters */ +#define RDPMC_METRIC (1 << 29) /* return metric counters */ + +#define FIXED_COUNTER_SLOTS 3 +#define METRIC_COUNTER_TOPDOWN_L1 0 + +static inline uint64_t read_slots(void) +{ + return _rdpmc(RDPMC_FIXED | FIXED_COUNTER_SLOTS); +} + +static inline uint64_t read_metrics(void) +{ + return _rdpmc(RDPMC_METRIC | METRIC_COUNTER_TOPDOWN_L1); +} + +Then the program can be instrumented to read these metrics at different +points. + +It's not a good idea to do this with too short code regions, +as the parallelism and overlap in the CPU program execution will +cause too much measurement inaccuracy. For example instrumenting +individual basic blocks is definitely too fine grained. + +Decoding metrics values +======================= + +The value reported by read_metrics() contains four 8 bit fields +that represent a scaled ratio that represent the Level 1 bottleneck. +All four fields add up to 0xff (= 100%) + +The binary ratios in the metric value can be converted to float ratios: + +#define GET_METRIC(m, i) (((m) >> (i*8)) & 0xff) + +#define TOPDOWN_RETIRING(val) ((float)GET_METRIC(val, 0) / 0xff) +#define TOPDOWN_BAD_SPEC(val) ((float)GET_METRIC(val, 1) / 0xff) +#define TOPDOWN_FE_BOUND(val) ((float)GET_METRIC(val, 2) / 0xff) +#define TOPDOWN_BE_BOUND(val) ((float)GET_METRIC(val, 3) / 0xff) + +and then converted to percent for printing. + +The ratios in the metric accumulate for the time when the counter +is enabled. For measuring programs it is often useful to measure +specific sections. For this it is needed to deltas on metrics. + +This can be done by scaling the metrics with the slots counter +read at the same time. + +Then it's possible to take deltas of these slots counts +measured at different points, and determine the metrics +for that time period. + + slots_a = read_slots(); + metric_a = read_metrics(); + + ... larger code region ... + + slots_b = read_slots() + metric_b = read_metrics() + + # compute scaled metrics for measurement a + retiring_slots_a = GET_METRIC(metric_a, 0) * slots_a + bad_spec_slots_a = GET_METRIC(metric_a, 1) * slots_a + fe_bound_slots_a = GET_METRIC(metric_a, 2) * slots_a + be_bound_slots_a = GET_METRIC(metric_a, 3) * slots_a + + # compute delta scaled metrics between b and a + retiring_slots = GET_METRIC(metric_b, 0) * slots_b - retiring_slots_a + bad_spec_slots = GET_METRIC(metric_b, 1) * slots_b - bad_spec_slots_a + fe_bound_slots = GET_METRIC(metric_b, 2) * slots_b - fe_bound_slots_a + be_bound_slots = GET_METRIC(metric_b, 3) * slots_b - be_bound_slots_a + +Later the individual ratios for the measurement period can be recreated +from these counts. + + slots_delta = slots_b - slots_a + retiring_ratio = (float)retiring_slots / slots_delta + bad_spec_ratio = (float)bad_spec_slots / slots_delta + fe_bound_ratio = (float)fe_bound_slots / slots_delta + be_bound_ratio = (float)be_bound_slots / slota_delta + + printf("Retiring %.2f%% Bad Speculation %.2f%% FE Bound %.2f%% BE Bound %.2f%%\n", + retiring_ratio * 100., + bad_spec_ratio * 100., + fe_bound_ratio * 100., + be_bound_ratio * 100.); + +Resetting metrics counters +========================== + +Since the individual metrics are only 8bit they lose precision for +short regions over time because the number of cycles covered by each +fraction bit shrinks. So the counters need to be reset regularly. + +When using the kernel perf API the kernel resets on every read. +So as long as the reading is at reasonable intervals (every few +seconds) the precision is good. + +When using perf stat it is recommended to always use the -I option, +with no longer interval than a few seconds + + perf stat -I 1000 --topdown ... + +For user programs using RDPMC directly the counter can +be reset explicitly using ioctl: + + ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0); + +This "opens" a new measurement period. + +A program using RDPMC for TopDown should schedule such a reset +regularly, as in every few seconds. + +Limits on Ice Lake +================== + +Four pseudo TopDown metric events are exposed for the end-users, +topdown-retiring, topdown-bad-spec, topdown-fe-bound and topdown-be-bound. +They can be used to collect the TopDown value under the following +rules: +- All the TopDown metric events must be in a group with the SLOTS event. +- The SLOTS event must be the leader of the group. +- The PERF_FORMAT_GROUP flag must be applied for each TopDown metric + events + +The SLOTS event and the TopDown metric events can be counting members of +a sampling read group. Since the SLOTS event must be the leader of a TopDown +group, the second event of the group is the sampling event. +For example, perf record -e '{slots, $sampling_event, topdown-retiring}:S' + + +[1] https://software.intel.com/en-us/top-down-microarchitecture-analysis-method-win +[2] https://github.com/andikleen/pmu-tools/wiki/toplev-manual +[3] https://software.intel.com/en-us/intel-vtune-amplifier-xe +[4] https://github.com/andikleen/pmu-tools/tree/master/jevents +[5] https://sites.google.com/site/analysismethods/yasin-pubs -- cgit v1.3.1 From a803fbe61d973ca3d65d2892a62d5fcbe6d195ff Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 15 Sep 2020 16:15:41 +0800 Subject: perf metric: Remove duplicate include Remove duplicate header which is included twice. Signed-off-by: YueHaibing Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200915081541.41004-1-yuehaibing@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index cd7331aac3bd..aea4f970fccc 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -11,8 +11,6 @@ #include "debug.h" #include "expr.h" #include "stat.h" -#include -#include static struct pmu_event pme_test[] = { { -- cgit v1.3.1 From 99f638173e186dea7c7f106f566bbca59e923c24 Mon Sep 17 00:00:00 2001 From: Zejiang Tang Date: Wed, 9 Sep 2020 17:53:14 +0800 Subject: perf docs: Improve help information in perf.txt perf has many undocumented options, such as:-vv, --exec-path, --html-path, -p, --paginate,--no-pager, --debugfs-dir, --buildid-dir, --list-cmds, --list-opts. Add entris for these options in perf.txt. Signed-off-by: Zejiang Tang Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/1599645194-8438-1-git-send-email-tangzejiang@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.txt | 69 ++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 3f37ded13f8c..c130a3c46a90 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -12,32 +12,57 @@ SYNOPSIS OPTIONS ------- ---debug:: - Setup debug variable (see list below) in value - range (0, 10). Use like: - --debug verbose # sets verbose = 1 - --debug verbose=2 # sets verbose = 2 - - List of debug variables allowed to set: - verbose - general debug messages - ordered-events - ordered events object debug messages - data-convert - data convert command debug messages - stderr - write debug output (option -v) to stderr - in browser mode - perf-event-open - Print perf_event_open() arguments and - return value - ---buildid-dir:: - Setup buildid cache directory. It has higher priority than - buildid.dir config file option. +-h:: +--help:: + Run perf help command. -v:: --version:: - Display perf version. + Display perf version. --h:: ---help:: - Run perf help command. +-vv:: + Print the compiled-in status of libraries. + +--exec-path:: + Display or set exec path. + +--html-path:: + Display html documentation path. + +-p:: +--paginate:: + Set up pager. + +--no-pager:: + Do not set pager. + +--buildid-dir:: + Setup buildid cache directory. It has higher priority + than buildid.dir config file option. + +--list-cmds:: + List the most commonly used perf commands. + +--list-opts:: + List available perf options. + +--debugfs-dir:: + Set debugfs directory or set environment variable PERF_DEBUGFS_DIR. + +--debug:: + Setup debug variable (see list below) in value + range (0, 10). Use like: + --debug verbose # sets verbose = 1 + --debug verbose=2 # sets verbose = 2 + + List of debug variables allowed to set: + verbose - general debug messages + ordered-events - ordered events object debug messages + data-convert - data convert command debug messages + stderr - write debug output (option -v) to stderr + in browser mode + perf-event-open - Print perf_event_open() arguments and + return value DESCRIPTION ----------- -- cgit v1.3.1 From fc18380fb9d2f2f0f16b04e3fbaa1fa286776b01 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Sep 2020 11:49:22 +0300 Subject: perf script: Display negative tid in non-sample events The kernel can release tasks while they are still running. This can result in a task having no tid, in which case perf records a tid of -1. Improve the perf script output in that case. Example: Before: # cat ./autoreap.c #include #include #include #include struct sigaction act = { .sa_handler = SIG_IGN, }; int main() { pid_t child; int status = 0; sigaction(SIGCHLD, &act, NULL); child = fork(); if (child == 0) return 123; wait(&status); return 0; } # gcc -o autoreap autoreap.c # ./perf record -a -e dummy --switch-events ./autoreap [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.948 MB perf.data ] # ./perf script --show-task-events --show-switch-events | grep -C2 'autoreap\|4294967295\|-1' swapper 0 [004] 18462.673613: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 25189/25189 perf 25189 [004] 18462.673614: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 autoreap 25189 [004] 18462.673800: PERF_RECORD_COMM exec: autoreap:25189/25189 autoreap 25189 [004] 18462.674042: PERF_RECORD_FORK(25191:25191):(25189:25189) autoreap 25189 [004] 18462.674050: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [004] 18462.674051: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 25189/25189 swapper 0 [005] 18462.674083: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 25191/25191 autoreap 25191 [005] 18462.674084: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 swapper 0 [003] 18462.674121: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 11/11 rcu_preempt 11 [003] 18462.674121: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 rcu_preempt 11 [003] 18462.674124: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [003] 18462.674124: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11/11 autoreap 25191 [005] 18462.674138: PERF_RECORD_EXIT(25191:25191):(25189:25189) PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [005] 18462.674149: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 4294967295/4294967295 swapper 0 [004] 18462.674182: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 25189/25189 autoreap 25189 [004] 18462.674183: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 autoreap 25189 [004] 18462.674218: PERF_RECORD_EXIT(25189:25189):(25188:25188) autoreap 25189 [004] 18462.674225: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [004] 18462.674226: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 25189/25189 swapper 0 [007] 18462.674257: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 25188/25188 After: # ./perf script --show-task-events --show-switch-events | grep -C2 'autoreap\|4294967295\|-1' swapper 0 [004] 18462.673613: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 25189/25189 perf 25189 [004] 18462.673614: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 autoreap 25189 [004] 18462.673800: PERF_RECORD_COMM exec: autoreap:25189/25189 autoreap 25189 [004] 18462.674042: PERF_RECORD_FORK(25191:25191):(25189:25189) autoreap 25189 [004] 18462.674050: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [004] 18462.674051: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 25189/25189 swapper 0 [005] 18462.674083: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 25191/25191 autoreap 25191 [005] 18462.674084: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 swapper 0 [003] 18462.674121: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 11/11 rcu_preempt 11 [003] 18462.674121: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 rcu_preempt 11 [003] 18462.674124: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [003] 18462.674124: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11/11 autoreap 25191 [005] 18462.674138: PERF_RECORD_EXIT(25191:25191):(25189:25189) :-1 -1 [005] 18462.674149: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [005] 18462.674149: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: -1/-1 swapper 0 [004] 18462.674182: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 25189/25189 autoreap 25189 [004] 18462.674183: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 autoreap 25189 [004] 18462.674218: PERF_RECORD_EXIT(25189:25189):(25188:25188) autoreap 25189 [004] 18462.674225: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [004] 18462.674226: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 25189/25189 swapper 0 [007] 18462.674257: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 25188/25188 Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Yu-cheng Yu Link: http://lore.kernel.org/lkml/20200909084923.9096-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 10 ++++++---- tools/perf/util/event.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 484ce6067d23..48588ccf902e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -702,12 +702,14 @@ static int perf_sample__fprintf_start(struct perf_script *script, char tstr[128]; if (PRINT_FIELD(COMM)) { + const char *comm = thread ? thread__comm_str(thread) : ":-1"; + if (latency_format) - printed += fprintf(fp, "%8.8s ", thread__comm_str(thread)); + printed += fprintf(fp, "%8.8s ", comm); else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain) - printed += fprintf(fp, "%s ", thread__comm_str(thread)); + printed += fprintf(fp, "%s ", comm); else - printed += fprintf(fp, "%16s ", thread__comm_str(thread)); + printed += fprintf(fp, "%16s ", comm); } if (PRINT_FIELD(PID) && PRINT_FIELD(TID)) @@ -2238,7 +2240,7 @@ static int print_event_with_time(struct perf_tool *tool, if (tid != -1) thread = machine__findnew_thread(machine, pid, tid); - if (thread && evsel) { + if (evsel) { perf_sample__fprintf_start(script, sample, thread, evsel, event->header.type, stdout); } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 317a26571845..05616d4138a9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -398,7 +398,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) if (event->header.type == PERF_RECORD_SWITCH) return fprintf(fp, " %s\n", in_out); - return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n", + return fprintf(fp, " %s %s pid/tid: %5d/%-5d\n", in_out, out ? "next" : "prev", event->context_switch.next_prev_pid, event->context_switch.next_prev_tid); -- cgit v1.3.1 From 7d537a8d2e76bc4fc71e34545ceaa463ac2cd928 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Sep 2020 11:49:23 +0300 Subject: perf intel-pt: Fix "context_switch event has no tid" error A context_switch event can have no tid because pids can be detached from a task while the task is still running (in do_exit()). Note this won't happen with per-task contexts because then tracing stops at perf_event_exit_task() If a task with no tid gets preempted, or a dying task gets preempted and its parent releases it, when it subsequently gets switched back in, Intel PT will not be able to determine what task is running and prints an error "context_switch event has no tid". However, it is not really an error because the task is in kernel space and the decoder can continue to decode successfully. Fix by changing the error to be only a logged message, and make allowance for tid == -1. Example: Using 5.9-rc4 with Preemptible Kernel (Low-Latency Desktop) e.g. $ uname -r 5.9.0-rc4 $ grep PREEMPT .config # CONFIG_PREEMPT_NONE is not set # CONFIG_PREEMPT_VOLUNTARY is not set CONFIG_PREEMPT=y CONFIG_PREEMPT_COUNT=y CONFIG_PREEMPTION=y CONFIG_PREEMPT_RCU=y CONFIG_PREEMPT_NOTIFIERS=y CONFIG_DRM_I915_PREEMPT_TIMEOUT=640 CONFIG_DEBUG_PREEMPT=y # CONFIG_PREEMPT_TRACER is not set # CONFIG_PREEMPTIRQ_DELAY_TEST is not set Before: $ cat forkit.c #include #include #include int main() { pid_t child; int status = 0; child = fork(); if (child == 0) return 123; wait(&status); return 0; } $ gcc -o forkit forkit.c $ sudo ~/bin/perf record --kcore -a -m,64M -e intel_pt/cyc/k & [1] 11016 $ taskset 2 ./forkit $ sudo pkill perf $ [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 17.262 MB perf.data ] [1]+ Terminated sudo ~/bin/perf record --kcore -a -m,64M -e intel_pt/cyc/k $ sudo ~/bin/perf script --show-task-events --show-switch-events --itrace=iqqe-o -C 1 --ns | grep -C 2 forkit context_switch event has no tid taskset 11019 [001] 66663.270045029: 1 instructions:k: ffffffffb1d9f844 strnlen_user+0xb4 ([kernel.kallsyms]) taskset 11019 [001] 66663.270201816: 1 instructions:k: ffffffffb1a83121 unmap_page_range+0x561 ([kernel.kallsyms]) forkit 11019 [001] 66663.270327553: PERF_RECORD_COMM exec: forkit:11019/11019 forkit 11019 [001] 66663.270420028: 1 instructions:k: ffffffffb1db9537 __clear_user+0x27 ([kernel.kallsyms]) forkit 11019 [001] 66663.270648704: 1 instructions:k: ffffffffb18829e6 do_user_addr_fault+0xf6 ([kernel.kallsyms]) forkit 11019 [001] 66663.270833163: 1 instructions:k: ffffffffb230a825 irqentry_exit_to_user_mode+0x15 ([kernel.kallsyms]) forkit 11019 [001] 66663.271092359: 1 instructions:k: ffffffffb1aea3d9 lock_page_memcg+0x9 ([kernel.kallsyms]) forkit 11019 [001] 66663.271207092: PERF_RECORD_FORK(11020:11020):(11019:11019) forkit 11019 [001] 66663.271234775: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 11020/11020 forkit 11020 [001] 66663.271238407: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11019/11019 forkit 11020 [001] 66663.271312066: 1 instructions:k: ffffffffb1a88140 handle_mm_fault+0x10 ([kernel.kallsyms]) forkit 11020 [001] 66663.271476225: PERF_RECORD_EXIT(11020:11020):(11019:11019) forkit 11020 [001] 66663.271497488: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 11019/11019 forkit 11019 [001] 66663.271500523: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11020/11020 forkit 11019 [001] 66663.271517241: 1 instructions:k: ffffffffb24012cd error_entry+0x6d ([kernel.kallsyms]) forkit 11019 [001] 66663.271664080: PERF_RECORD_EXIT(11019:11019):(1386:1386) After: $ sudo ~/bin/perf script --show-task-events --show-switch-events --itrace=iqqe-o -C 1 --ns | grep -C 2 forkit taskset 11019 [001] 66663.270045029: 1 instructions:k: ffffffffb1d9f844 strnlen_user+0xb4 ([kernel.kallsyms]) taskset 11019 [001] 66663.270201816: 1 instructions:k: ffffffffb1a83121 unmap_page_range+0x561 ([kernel.kallsyms]) forkit 11019 [001] 66663.270327553: PERF_RECORD_COMM exec: forkit:11019/11019 forkit 11019 [001] 66663.270420028: 1 instructions:k: ffffffffb1db9537 __clear_user+0x27 ([kernel.kallsyms]) forkit 11019 [001] 66663.270648704: 1 instructions:k: ffffffffb18829e6 do_user_addr_fault+0xf6 ([kernel.kallsyms]) forkit 11019 [001] 66663.270833163: 1 instructions:k: ffffffffb230a825 irqentry_exit_to_user_mode+0x15 ([kernel.kallsyms]) forkit 11019 [001] 66663.271092359: 1 instructions:k: ffffffffb1aea3d9 lock_page_memcg+0x9 ([kernel.kallsyms]) forkit 11019 [001] 66663.271207092: PERF_RECORD_FORK(11020:11020):(11019:11019) forkit 11019 [001] 66663.271234775: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 11020/11020 forkit 11020 [001] 66663.271238407: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11019/11019 forkit 11020 [001] 66663.271312066: 1 instructions:k: ffffffffb1a88140 handle_mm_fault+0x10 ([kernel.kallsyms]) forkit 11020 [001] 66663.271476225: PERF_RECORD_EXIT(11020:11020):(11019:11019) forkit 11020 [001] 66663.271497488: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 11019/11019 forkit 11019 [001] 66663.271500523: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11020/11020 forkit 11019 [001] 66663.271517241: 1 instructions:k: ffffffffb24012cd error_entry+0x6d ([kernel.kallsyms]) forkit 11019 [001] 66663.271664080: PERF_RECORD_EXIT(11019:11019):(1386:1386) forkit 11019 [001] 66663.271688752: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: -1/-1 :-1 -1 [001] 66663.271692086: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 11019/11019 :-1 -1 [001] 66663.271707466: 1 instructions:k: ffffffffb18eb096 update_load_avg+0x306 ([kernel.kallsyms]) Fixes: 86c2786994bd7c ("perf intel-pt: Add support for PERF_RECORD_SWITCH") Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Yu-cheng Yu Link: http://lore.kernel.org/lkml/20200909084923.9096-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 0af4e81c46e2..3a0348caec7d 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1101,6 +1101,8 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, if (queue->tid == -1 || pt->have_sched_switch) { ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); + if (ptq->tid == -1) + ptq->pid = -1; thread__zput(ptq->thread); } @@ -2603,10 +2605,8 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, tid = sample->tid; } - if (tid == -1) { - pr_err("context_switch event has no tid\n"); - return -EINVAL; - } + if (tid == -1) + intel_pt_log("context_switch event has no tid\n"); ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); if (ret <= 0) -- cgit v1.3.1 From dcc81be0fc4e66943041e6e19a5faf8f8704a27e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Sep 2020 13:18:07 -0700 Subject: perf metricgroup: Fix uncore metric expressions A metric like DRAM_BW_Use has on SkylakeX events uncore_imc/cas_count_read/ and uncore_imc/case_count_write/. These events open 6 events per socket with pmu names of uncore_imc_[0-5]. The current metric setup code in find_evsel_group assumes one ID will map to 1 event to be recorded in metric_events. For events with multiple matches, the first event is recorded in metric_events (avoiding matching >1 event with the same name) and the evlist_used updated so that duplicate events aren't removed when the evlist has unused events removed. Before this change: $ /tmp/perf/perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 41.14 MiB uncore_imc/cas_count_read/ 1,002,614,251 ns duration_time 1.002614251 seconds time elapsed After this change: $ /tmp/perf/perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 157.47 MiB uncore_imc/cas_count_read/ # 0.00 DRAM_BW_Use 126.97 MiB uncore_imc/cas_count_write/ 1,003,019,728 ns duration_time Erroneous duplication introduced in: commit 2440689d62e9 ("perf metricgroup: Remove duped metric group events"). Fixes: ded80bda8bc9 ("perf expr: Migrate expr ids table to a hashmap"). Reported-by: Jin Yao Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Athira Jajeev Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20200917201807.4090224-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 75 ++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ab5030fcfed4..d948a7f910cf 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -150,6 +150,18 @@ static void expr_ids__exit(struct expr_ids *ids) free(ids->id[i].id); } +static bool contains_event(struct evsel **metric_events, int num_events, + const char *event_name) +{ + int i; + + for (i = 0; i < num_events; i++) { + if (!strcmp(metric_events[i]->name, event_name)) + return true; + } + return false; +} + /** * Find a group of events in perf_evlist that correpond to those from a parsed * metric expression. Note, as find_evsel_group is called in the same order as @@ -180,7 +192,11 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, int i = 0, matched_events = 0, events_to_match; const int idnum = (int)hashmap__size(&pctx->ids); - /* duration_time is grouped separately. */ + /* + * duration_time is always grouped separately, when events are grouped + * (ie has_constraint is false) then ignore it in the matching loop and + * add it to metric_events at the end. + */ if (!has_constraint && hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr)) events_to_match = idnum - 1; @@ -207,23 +223,20 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, sizeof(struct evsel *) * idnum); current_leader = ev->leader; } - if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) { - if (has_constraint) { - /* - * Events aren't grouped, ensure the same event - * isn't matched from two groups. - */ - for (i = 0; i < matched_events; i++) { - if (!strcmp(ev->name, - metric_events[i]->name)) { - break; - } - } - if (i != matched_events) - continue; - } + /* + * Check for duplicate events with the same name. For example, + * uncore_imc/cas_count_read/ will turn into 6 events per socket + * on skylakex. Only the first such event is placed in + * metric_events. If events aren't grouped then this also + * ensures that the same event in different sibling groups + * aren't both added to metric_events. + */ + if (contains_event(metric_events, matched_events, ev->name)) + continue; + /* Does this event belong to the parse context? */ + if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) metric_events[matched_events++] = ev; - } + if (matched_events == events_to_match) break; } @@ -239,7 +252,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, } if (matched_events != idnum) { - /* Not whole match */ + /* Not a whole match */ return NULL; } @@ -247,8 +260,32 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, for (i = 0; i < idnum; i++) { ev = metric_events[i]; - ev->metric_leader = ev; + /* Don't free the used events. */ set_bit(ev->idx, evlist_used); + /* + * The metric leader points to the identically named event in + * metric_events. + */ + ev->metric_leader = ev; + /* + * Mark two events with identical names in the same group (or + * globally) as being in use as uncore events may be duplicated + * for each pmu. Set the metric leader of such events to be the + * event that appears in metric_events. + */ + evlist__for_each_entry_continue(perf_evlist, ev) { + /* + * If events are grouped then the search can terminate + * when then group is left. + */ + if (!has_constraint && + ev->leader != metric_events[i]->leader) + break; + if (!strcmp(metric_events[i]->name, ev->name)) { + set_bit(ev->idx, evlist_used); + ev->metric_leader = metric_events[i]; + } + } } return metric_events[0]; -- cgit v1.3.1 From 8b974778f998ab1be23eca7436fc13d2d8c6bd59 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 16 Sep 2020 15:13:51 +0200 Subject: selftests: mptcp: interpret \n as a new line In case of errors, this message was printed: (...) # read: Resource temporarily unavailable # client exit code 0, server 3 # \nnetns ns1-0-BJlt5D socket stat for 10003: (...) Obviously, the idea was to add a new line before the socket stat and not print "\nnetns". Fixes: b08fbf241064 ("selftests: add test-cases for MPTCP MP_JOIN") Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Signed-off-by: Matthieu Baerts Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 4 ++-- tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index e4df9ba64824..2cfd87d94db8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -443,9 +443,9 @@ do_transfer() duration=$(printf "(duration %05sms)" $duration) if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" cat "$capout" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f39c1129ce5f..c2943e4dfcfe 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -176,9 +176,9 @@ do_transfer() if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo " client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" cat "$capout" -- cgit v1.3.1 From c8bd596f9388bceda6cf008d554cbb1a4f2244e7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 17 Sep 2020 14:15:19 +1000 Subject: selftests/harness: Flush stdout before forking The test harness forks() a child to run each test. Both the parent and the child print to stdout using libc functions. That can lead to duplicated (or more) output if the libc buffers are not flushed before forking. It's generally not seen when running programs directly, because stdout will usually be line buffered when it's pointing to a terminal. This was noticed when running the seccomp_bpf test, eg: $ ./seccomp_bpf | tee test.log $ grep -c "TAP version 13" test.log 2 But we only expect the TAP header to appear once. It can be exacerbated using stdbuf to increase the buffer size: $ stdbuf -o 1MB ./seccomp_bpf > test.log $ grep -c "TAP version 13" test.log 13 The fix is simple, we just flush stdout & stderr before fork. Usually stderr is unbuffered, but that can be changed, so flush it as well just to be safe. Signed-off-by: Michael Ellerman Tested-by: Max Filippov Acked-by: Shuah Khan Acked-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4f78e4805633..f19804df244c 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -971,6 +971,11 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg(" RUN %s%s%s.%s ...\n", f->name, variant->name[0] ? "." : "", variant->name, t->name); + + /* Make sure output buffers are flushed before fork */ + fflush(stdout); + fflush(stderr); + t->pid = fork(); if (t->pid < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); -- cgit v1.3.1 From 897217b9a00426a037215c5539f467733a3823fe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 17 Sep 2020 09:13:33 -0600 Subject: selftests: Set default protocol for raw sockets in nettest IPPROTO_IP (0) is not valid for raw sockets. Default the protocol for raw sockets to IPPROTO_RAW if the protocol has not been set via the -P option. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/nettest.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 93208caacbe6..f75c53ce0a2d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -1667,6 +1667,8 @@ int main(int argc, char *argv[]) case 'R': args.type = SOCK_RAW; args.port = 0; + if (!args.protocol) + args.protocol = IPPROTO_RAW; break; case 'P': pe = getprotobyname(optarg); -- cgit v1.3.1 From 09b28d76eac48e922dc293da1aa2b2b85c32aeee Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 17 Sep 2020 19:09:18 -0700 Subject: bpf: Add abnormal return checks. LD_[ABS|IND] instructions may return from the function early. bpf_tail_call pseudo instruction is either fallthrough or return. Allow them in the subprograms only when subprograms are BTF annotated and have scalar return types. Allow ld_abs and tail_call in the main program even if it calls into subprograms. In the past that was not ok to do for ld_abs, since it was JITed with special exit sequence. Since bpf_gen_ld_abs() was introduced the ld_abs looks like normal exit insn from JIT point of view, so it's safe to allow them in the main program. Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 67 ++++++++++++++++++++-------- tools/testing/selftests/bpf/verifier/calls.c | 6 +-- 3 files changed, 52 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index fbc964526ba3..2bb48a2c4d08 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -360,6 +360,7 @@ struct bpf_subprog_info { u16 stack_depth; /* max. stack depth used by this function */ bool has_tail_call; bool tail_call_reachable; + bool has_ld_abs; }; /* single container for all structs diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d1c009e8c57f..4161b6c406bc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1494,6 +1494,9 @@ static int check_subprogs(struct bpf_verifier_env *env) insn[i].imm == BPF_FUNC_tail_call && insn[i].src_reg != BPF_PSEUDO_CALL) subprog[cur_subprog].has_tail_call = true; + if (BPF_CLASS(code) == BPF_LD && + (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) + subprog[cur_subprog].has_ld_abs = true; if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) goto next; if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) @@ -7514,18 +7517,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } - if (env->subprog_cnt > 1) { - /* when program has LD_ABS insn JITs and interpreter assume - * that r1 == ctx == skb which is not the case for callees - * that can have arbitrary arguments. It's problematic - * for main prog as well since JITs would need to analyze - * all functions in order to make proper register save/restore - * decisions in the main prog. Hence disallow LD_ABS with calls - */ - verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n"); - return -EINVAL; - } - if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || BPF_SIZE(insn->code) == BPF_DW || (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { @@ -7936,6 +7927,23 @@ err_free: return ret; } +static int check_abnormal_return(struct bpf_verifier_env *env) +{ + int i; + + for (i = 1; i < env->subprog_cnt; i++) { + if (env->subprog_info[i].has_ld_abs) { + verbose(env, "LD_ABS is not allowed in subprogs without BTF\n"); + return -EINVAL; + } + if (env->subprog_info[i].has_tail_call) { + verbose(env, "tail_call is not allowed in subprogs without BTF\n"); + return -EINVAL; + } + } + return 0; +} + /* The minimum supported BTF func info size */ #define MIN_BPF_FUNCINFO_SIZE 8 #define MAX_FUNCINFO_REC_SIZE 252 @@ -7944,20 +7952,24 @@ static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr) { + const struct btf_type *type, *func_proto, *ret_type; u32 i, nfuncs, urec_size, min_size; u32 krec_size = sizeof(struct bpf_func_info); struct bpf_func_info *krecord; struct bpf_func_info_aux *info_aux = NULL; - const struct btf_type *type; struct bpf_prog *prog; const struct btf *btf; void __user *urecord; u32 prev_offset = 0; + bool scalar_return; int ret = -ENOMEM; nfuncs = attr->func_info_cnt; - if (!nfuncs) + if (!nfuncs) { + if (check_abnormal_return(env)) + return -EINVAL; return 0; + } if (nfuncs != env->subprog_cnt) { verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); @@ -8005,25 +8017,23 @@ static int check_btf_func(struct bpf_verifier_env *env, } /* check insn_off */ + ret = -EINVAL; if (i == 0) { if (krecord[i].insn_off) { verbose(env, "nonzero insn_off %u for the first func info record", krecord[i].insn_off); - ret = -EINVAL; goto err_free; } } else if (krecord[i].insn_off <= prev_offset) { verbose(env, "same or smaller insn offset (%u) than previous func info record (%u)", krecord[i].insn_off, prev_offset); - ret = -EINVAL; goto err_free; } if (env->subprog_info[i].start != krecord[i].insn_off) { verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); - ret = -EINVAL; goto err_free; } @@ -8032,10 +8042,26 @@ static int check_btf_func(struct bpf_verifier_env *env, if (!type || !btf_type_is_func(type)) { verbose(env, "invalid type id %d in func info", krecord[i].type_id); - ret = -EINVAL; goto err_free; } info_aux[i].linkage = BTF_INFO_VLEN(type->info); + + func_proto = btf_type_by_id(btf, type->type); + if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto))) + /* btf_func_check() already verified it during BTF load */ + goto err_free; + ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL); + scalar_return = + btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type); + if (i && !scalar_return && env->subprog_info[i].has_ld_abs) { + verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n"); + goto err_free; + } + if (i && !scalar_return && env->subprog_info[i].has_tail_call) { + verbose(env, "tail_call is only allowed in functions that return 'int'.\n"); + goto err_free; + } + prev_offset = krecord[i].insn_off; urecord += urec_size; } @@ -8196,8 +8222,11 @@ static int check_btf_info(struct bpf_verifier_env *env, struct btf *btf; int err; - if (!attr->func_info_cnt && !attr->line_info_cnt) + if (!attr->func_info_cnt && !attr->line_info_cnt) { + if (check_abnormal_return(env)) + return -EINVAL; return 0; + } btf = btf_get_by_fd(attr->prog_btf_fd); if (IS_ERR(btf)) diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 94258c6b5235..c4f5d909e58a 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -647,13 +647,14 @@ .result = REJECT, }, { - "calls: ld_abs with changing ctx data in callee", + "calls: subprog call with ld_abs in main prog", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LD_ABS(BPF_B, 0), BPF_LD_ABS(BPF_H, 0), BPF_LD_ABS(BPF_W, 0), BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), BPF_LD_ABS(BPF_B, 0), @@ -666,8 +667,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", - .result = REJECT, + .result = ACCEPT, }, { "calls: two calls with bad fallthrough", -- cgit v1.3.1 From 3b0379111197fb97f4f46f93946fe30b22a15223 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 16 Sep 2020 23:10:10 +0200 Subject: selftests/bpf: Add tailcall_bpf2bpf tests Add four tests to tailcalls selftest explicitly named "tailcall_bpf2bpf_X" as their purpose is to validate that combination of tailcalls with bpf2bpf calls are working properly. These tests also validate LD_ABS from subprograms. Signed-off-by: Maciej Fijalkowski Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/tailcalls.c | 332 +++++++++++++++++++++ .../selftests/bpf/progs/tailcall_bpf2bpf1.c | 38 +++ .../selftests/bpf/progs/tailcall_bpf2bpf2.c | 41 +++ .../selftests/bpf/progs/tailcall_bpf2bpf3.c | 61 ++++ .../selftests/bpf/progs/tailcall_bpf2bpf4.c | 61 ++++ 5 files changed, 533 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index bb8fe646dd9f..ee27d68d2a1c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -472,6 +473,329 @@ out: bpf_object__close(obj); } +/* test_tailcall_bpf2bpf_1 purpose is to make sure that tailcalls are working + * correctly in correlation with BPF subprograms + */ +static void test_tailcall_bpf2bpf_1(void) +{ + int err, map_fd, prog_fd, main_fd, i; + struct bpf_map *prog_array; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + /* nop -> jmp */ + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != 1, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + + /* jmp -> nop, call subprog that will do tailcall */ + i = 1; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + /* make sure that subprog can access ctx and entry prog that + * called this subprog can properly return + */ + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != sizeof(pkt_v4) * 2, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_2 checks that the count value of the tail call limit + * enforcement matches with expectations when tailcall is preceded with + * bpf2bpf call. + */ +static void test_tailcall_bpf2bpf_2(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char buff[128] = {}; + + err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + prog = bpf_object__find_program_by_title(obj, "classifier/0"); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + i = 0; + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n", + err, errno, val); + + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_3 checks that non-trivial amount of stack (up to + * 256 bytes) can be used within bpf subprograms that have the tailcalls + * in them + */ +static void test_tailcall_bpf2bpf_3(void) +{ + int err, map_fd, prog_fd, main_fd, i; + struct bpf_map *prog_array; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 1; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4), + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 2, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved + * across tailcalls combined with bpf2bpf calls. for making sure that tailcall + * counter behaves correctly, bpf program will go through following flow: + * + * entry -> entry_subprog -> tailcall0 -> bpf_func0 -> subprog0 -> + * -> tailcall1 -> bpf_func1 -> subprog1 -> tailcall2 -> bpf_func2 -> + * subprog2 [here bump global counter] --------^ + * + * We go through first two tailcalls and start counting from the subprog2 where + * the loop begins. At the end of the test make sure that the global counter is + * equal to 31, because tailcall counter includes the first two tailcalls + * whereas global counter is incremented only on loop presented on flow above. + */ +static void test_tailcall_bpf2bpf_4(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n", + err, errno, val); + +out: + bpf_object__close(obj); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -484,4 +808,12 @@ void test_tailcalls(void) test_tailcall_4(); if (test__start_subtest("tailcall_5")) test_tailcall_5(); + if (test__start_subtest("tailcall_bpf2bpf_1")) + test_tailcall_bpf2bpf_1(); + if (test__start_subtest("tailcall_bpf2bpf_2")) + test_tailcall_bpf2bpf_2(); + if (test__start_subtest("tailcall_bpf2bpf_3")) + test_tailcall_bpf2bpf_3(); + if (test__start_subtest("tailcall_bpf2bpf_4")) + test_tailcall_bpf2bpf_4(); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c new file mode 100644 index 000000000000..b5d9c8e778ae --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +#define TAIL_FUNC(x) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ + { \ + return x; \ + } +TAIL_FUNC(0) +TAIL_FUNC(1) + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 0); + + return skb->len * 2; +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 1); + + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c new file mode 100644 index 000000000000..a004ab28ce28 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "bpf_legacy.h" + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + if (load_byte(skb, 0)) + bpf_tail_call(skb, &jmp_table, 1); + else + bpf_tail_call(skb, &jmp_table, 0); + return 1; +} + +static volatile int count; + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + count++; + return subprog_tail(skb); +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 0); + + return 0; +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c new file mode 100644 index 000000000000..96dbef2b6b7c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "bpf_legacy.h" + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +__noinline +int subprog_tail2(struct __sk_buff *skb) +{ + volatile char arr[64] = {}; + + if (load_word(skb, 0) || load_half(skb, 0)) + bpf_tail_call(skb, &jmp_table, 10); + else + bpf_tail_call(skb, &jmp_table, 1); + + return skb->len; +} + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + volatile char arr[64] = {}; + + bpf_tail_call(skb, &jmp_table, 0); + + return skb->len * 2; +} + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return subprog_tail2(skb); +} + +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return skb->len * 3; +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c new file mode 100644 index 000000000000..98b40a95bc67 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static volatile int count; + +__noinline +int subprog_tail_2(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 2); + return skb->len * 3; +} + +__noinline +int subprog_tail_1(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 1); + return skb->len * 2; +} + +__noinline +int subprog_tail(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 0); + return skb->len; +} + +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) +{ + return subprog_tail_2(skb); +} + +SEC("classifier/2") +int bpf_func_2(struct __sk_buff *skb) +{ + count++; + return subprog_tail_2(skb); +} + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + return subprog_tail_1(skb); +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; -- cgit v1.3.1 From ac7a75d1fbe072fc5535311ecc080f10645aeba1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 18 Sep 2020 17:01:30 +0900 Subject: perf probe: Fix to adjust symbol address with correct reloc_sym address 'perf probe' uses ref_reloc_sym to adjust symbol offset address from debuginfo address or ref_reloc_sym based address, but that is misusing reloc_sym->addr and reloc_sym->unrelocated_addr. If map is not relocated (map->reloc == 0), we can use reloc_sym->addr as unrelocated address instead of reloc_sym->unrelocated_addr. This usually does not happen. If we have a non-stripped ELF binary, we will use it for map and debuginfo, if not, we use only kallsyms without debuginfo. Thus, the map is always relocated (ELF and DWARF binary) or not relocated (kallsyms). However, if we allow the combination of debuginfo and kallsyms based map (like using debuginfod), we have to check the map->reloc and choose the collect address of reloc_sym. Signed-off-by: Masami Hiramatsu Reviewed-by: Frank Ch. Eigler Cc: Aaron Merey Cc: Daniel Thompson Link: http://lore.kernel.org/lkml/160041609047.912668.14314639291419159274.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 99d36ac77c08..17831f186ab5 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -129,9 +129,10 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, struct map *map; /* ref_reloc_sym is just a label. Need a special fix*/ - reloc_sym = kernel_get_ref_reloc_sym(NULL); + reloc_sym = kernel_get_ref_reloc_sym(&map); if (reloc_sym && strcmp(name, reloc_sym->name) == 0) - *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; + *addr = (!map->reloc || reloc) ? reloc_sym->addr : + reloc_sym->unrelocated_addr; else { sym = machine__find_kernel_symbol_by_name(host_machine, name, &map); if (!sym) @@ -795,7 +796,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, free(tevs[i].point.symbol); tevs[i].point.symbol = tmp; tevs[i].point.offset = tevs[i].point.address - - reloc_sym->unrelocated_addr; + (map->reloc ? reloc_sym->unrelocated_addr : + reloc_sym->addr); } return skipped; } -- cgit v1.3.1 From 7cd5738d0d8bb192fc86736ac7fb4a6cdb57f957 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 18 Sep 2020 17:01:41 +0900 Subject: perf probe: Fall back to debuginfod query if debuginfo and source not found locally Since 'perf probe' heavily depends on debuginfo, debuginfod gives us many benefits on the 'perf probe' command on remote machine. Especially, this will be helpful for the embedded devices which will not have enough storage, or boot with a cross-build kernel whose source code is in the host machine. This will work as similar to commit c7a14fdcb3fa7736 ("perf build-ids: Fall back to debuginfod query if debuginfo not found") Tested with: (host) $ cd PATH/TO/KBUILD/DIR/ (host) $ debuginfod -F . ... (remote) # perf probe -L vfs_read Failed to find the path for the kernel: No such file or directory Error: Failed to show lines. (remote) # export DEBUGINFOD_URLS="http://$HOST_IP:8002/" (remote) # perf probe -L vfs_read 0 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { 2 ssize_t ret; if (!(file->f_mode & FMODE_READ)) return -EBADF; 6 if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; 8 if (unlikely(!access_ok(buf, count))) return -EFAULT; 11 ret = rw_verify_area(READ, file, pos, count); 12 if (ret) return ret; if (count > MAX_RW_COUNT) ... (remote) # perf probe -a "vfs_read count" Added new event: probe:vfs_read (on vfs_read with count) (remote) # perf probe -l probe:vfs_read (on vfs_read@ksrc/linux/fs/read_write.c with count) Signed-off-by: Masami Hiramatsu Reviewed-by: Frank Ch. Eigler Cc: Aaron Merey Cc: Daniel Thompson Link: http://lore.kernel.org/lkml/160041610083.912668.13659563860278615846.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 58 ++++++++++++++++++++++++++++++++++++++- tools/perf/util/probe-finder.c | 61 ++++++++++++++++++++++++++++++++++++++---- tools/perf/util/probe-finder.h | 7 +++-- 3 files changed, 118 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 17831f186ab5..3a1b58a92673 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -43,6 +43,10 @@ #include #include +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include +#endif + #define PERFPROBE_GROUP "probe" bool probe_event_dry_run; /* Dry run flag */ @@ -338,6 +342,8 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) map = machine__kernel_map(host_machine); dso = map->dso; + if (!dso->has_build_id) + dso__read_running_kernel_build_id(dso, host_machine); vmlinux_name = symbol_conf.vmlinux_name; dso->load_errno = 0; @@ -453,6 +459,49 @@ static int get_alternative_line_range(struct debuginfo *dinfo, return ret; } +#ifdef HAVE_DEBUGINFOD_SUPPORT +static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *nsi, + bool silent) +{ + debuginfod_client *c = debuginfod_begin(); + char sbuild_id[SBUILD_ID_SIZE + 1]; + struct debuginfo *ret = NULL; + struct nscookie nsc; + char *path; + int fd; + + if (!c) + return NULL; + + build_id__sprintf(dso->build_id, BUILD_ID_SIZE, sbuild_id); + fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id, + 0, &path); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + if (!silent) + pr_debug("Failed to find debuginfo in debuginfod.\n"); + return NULL; + } + if (!silent) + pr_debug("Load debuginfo from debuginfod (%s)\n", path); + + nsinfo__mountns_enter(nsi, &nsc); + ret = debuginfo__new((const char *)path); + nsinfo__mountns_exit(&nsc); + return ret; +} +#else +static inline +struct debuginfo *open_from_debuginfod(struct dso *dso __maybe_unused, + struct nsinfo *nsi __maybe_unused, + bool silent __maybe_unused) +{ + return NULL; +} +#endif + /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi, bool silent) @@ -472,6 +521,10 @@ static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi, strcpy(reason, "(unknown)"); } else dso__strerror_load(dso, reason, STRERR_BUFSIZE); + if (dso) + ret = open_from_debuginfod(dso, nsi, silent); + if (ret) + return ret; if (!silent) { if (module) pr_err("Module %s is not loaded, please specify its full path name.\n", module); @@ -959,6 +1012,7 @@ static int __show_line_range(struct line_range *lr, const char *module, int ret; char *tmp; char sbuf[STRERR_BUFSIZE]; + char sbuild_id[SBUILD_ID_SIZE] = ""; /* Search a line range */ dinfo = open_debuginfo(module, NULL, false); @@ -971,6 +1025,8 @@ static int __show_line_range(struct line_range *lr, const char *module, if (!ret) ret = debuginfo__find_line_range(dinfo, lr); } + if (dinfo->build_id) + build_id__sprintf(dinfo->build_id, BUILD_ID_SIZE, sbuild_id); debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); @@ -982,7 +1038,7 @@ static int __show_line_range(struct line_range *lr, const char *module, /* Convert source file path */ tmp = lr->path; - ret = get_real_path(tmp, lr->comp_dir, &lr->path); + ret = find_source_path(tmp, sbuild_id, lr->comp_dir, &lr->path); /* Free old path when new path is assigned */ if (tmp != lr->path) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 659024342e9a..6eddf7be8293 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -31,6 +31,10 @@ #include "probe-file.h" #include "string2.h" +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include +#endif + /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 @@ -51,6 +55,7 @@ static const Dwfl_Callbacks offline_callbacks = { static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, const char *path) { + GElf_Addr dummy; int fd; fd = open(path, O_RDONLY); @@ -70,6 +75,8 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, if (!dbg->dbg) goto error; + dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); + dwfl_report_end(dbg->dwfl, NULL, NULL); return 0; @@ -942,6 +949,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno, /* Find probe points from lazy pattern */ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { + char sbuild_id[SBUILD_ID_SIZE] = ""; int ret = 0; char *fpath; @@ -949,7 +957,10 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) const char *comp_dir; comp_dir = cu_get_comp_dir(&pf->cu_die); - ret = get_real_path(pf->fname, comp_dir, &fpath); + if (pf->dbg->build_id) + build_id__sprintf(pf->dbg->build_id, + BUILD_ID_SIZE, sbuild_id); + ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath); if (ret < 0) { pr_warning("Failed to find source file path.\n"); return ret; @@ -1448,7 +1459,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, struct probe_trace_event **tevs) { struct trace_event_finder tf = { - .pf = {.pev = pev, .callback = add_probe_trace_event}, + .pf = {.pev = pev, .dbg = dbg, .callback = add_probe_trace_event}, .max_tevs = probe_conf.max_probes, .mod = dbg->mod}; int ret, i; @@ -1618,7 +1629,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct variable_list **vls) { struct available_var_finder af = { - .pf = {.pev = pev, .callback = add_available_vars}, + .pf = {.pev = pev, .dbg = dbg, .callback = add_available_vars}, .mod = dbg->mod, .max_vls = probe_conf.max_probes}; int ret; @@ -1973,17 +1984,57 @@ found: return (ret < 0) ? ret : lf.found; } +#ifdef HAVE_DEBUGINFOD_SUPPORT +/* debuginfod doesn't require the comp_dir but buildid is required */ +static int get_source_from_debuginfod(const char *raw_path, + const char *sbuild_id, char **new_path) +{ + debuginfod_client *c = debuginfod_begin(); + const char *p = raw_path; + int fd; + + if (!c) + return -ENOMEM; + + fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, + 0, p, new_path); + pr_debug("Search %s from debuginfod -> %d\n", p, fd); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + pr_debug("Failed to find %s in debuginfod (%s)\n", + raw_path, sbuild_id); + return -ENOENT; + } + pr_debug("Got a source %s\n", *new_path); + + return 0; +} +#else +static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, + const char *sbuild_id __maybe_unused, + char **new_path __maybe_unused) +{ + return -ENOTSUP; +} +#endif /* * Find a src file from a DWARF tag path. Prepend optional source path prefix * and chop off leading directories that do not exist. Result is passed back as * a newly allocated path on success. * Return 0 if file was found and readable, -errno otherwise. */ -int get_real_path(const char *raw_path, const char *comp_dir, - char **new_path) +int find_source_path(const char *raw_path, const char *sbuild_id, + const char *comp_dir, char **new_path) { const char *prefix = symbol_conf.source_prefix; + if (sbuild_id && !prefix) { + if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path)) + return 0; + } + if (!prefix) { if (raw_path[0] != '/' && comp_dir) /* If not an absolute path, try to use comp_dir */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 11be10080613..2febb5875678 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -4,6 +4,7 @@ #include #include "intlist.h" +#include "build-id.h" #include "probe-event.h" #include @@ -32,6 +33,7 @@ struct debuginfo { Dwfl_Module *mod; Dwfl *dwfl; Dwarf_Addr bias; + const unsigned char *build_id; }; /* This also tries to open distro debuginfo */ @@ -59,11 +61,12 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct variable_list **vls); /* Find a src file from a DWARF tag path */ -int get_real_path(const char *raw_path, const char *comp_dir, - char **new_path); +int find_source_path(const char *raw_path, const char *sbuild_id, + const char *comp_dir, char **new_path); struct probe_finder { struct perf_probe_event *pev; /* Target probe event */ + struct debuginfo *dbg; /* Callback when a probe point is found */ int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf); -- cgit v1.3.1 From e74e1d55728509b352e4eec4283dd5b2781b2070 Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 18 Sep 2020 11:47:12 +0100 Subject: kselftests/arm64: add a basic Pointer Authentication test PAuth signs and verifies return addresses on the stack. It does so by inserting a Pointer Authentication code (PAC) into some of the unused top bits of an address. This is achieved by adding paciasp/autiasp instructions at the beginning and end of a function. This feature is partially backwards compatible with earlier versions of the ARM architecture. To coerce the compiler into emitting fully backwards compatible code the main file is compiled to target an earlier ARM version. This allows the tests to check for the feature and print meaningful error messages instead of crashing. Add a test to verify that corrupting the return address results in a SIGSEGV on return. Signed-off-by: Boyan Karatotev Reviewed-by: Vincenzo Frascino Reviewed-by: Amit Daniel Kachhap Acked-by: Shuah Khan Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200918104715.182310-2-boian4o1@gmail.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/Makefile | 2 +- tools/testing/selftests/arm64/pauth/.gitignore | 1 + tools/testing/selftests/arm64/pauth/Makefile | 32 ++++++++++++++++ tools/testing/selftests/arm64/pauth/helper.h | 9 +++++ tools/testing/selftests/arm64/pauth/pac.c | 44 ++++++++++++++++++++++ .../testing/selftests/arm64/pauth/pac_corruptor.S | 19 ++++++++++ 6 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/pauth/.gitignore create mode 100644 tools/testing/selftests/arm64/pauth/Makefile create mode 100644 tools/testing/selftests/arm64/pauth/helper.h create mode 100644 tools/testing/selftests/arm64/pauth/pac.c create mode 100644 tools/testing/selftests/arm64/pauth/pac_corruptor.S (limited to 'tools') diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 93b567d23c8b..525506fd97b9 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal +ARM64_SUBTARGETS ?= tags signal pauth else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore new file mode 100644 index 000000000000..b557c916720a --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/.gitignore @@ -0,0 +1 @@ +pac diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile new file mode 100644 index 000000000000..01d35aaa610a --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + +CFLAGS += -mbranch-protection=pac-ret +# check if the compiler supports ARMv8.3 and branch protection with PAuth +pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(pauth_cc_support),1) +TEST_GEN_PROGS := pac +TEST_GEN_FILES := pac_corruptor.o +endif + +include ../../lib.mk + +ifeq ($(pauth_cc_support),1) +# pac* and aut* instructions are not available on architectures berfore +# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly +$(OUTPUT)/pac_corruptor.o: pac_corruptor.S + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + +# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or +# greater, gcc emits pac* instructions which are not in HINT NOP space, +# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can +# run on earlier targets and print a meaningful error messages +$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a +endif diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h new file mode 100644 index 000000000000..3e0a2a404bf4 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _HELPER_H_ +#define _HELPER_H_ + +void pac_corruptor(void); + +#endif diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c new file mode 100644 index 000000000000..0293310ba70a --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include +#include +#include + +#include "../../kselftest_harness.h" +#include "helper.h" + +#define ASSERT_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* data key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \ +} while (0) + +sigjmp_buf jmpbuf; +void pac_signal_handler(int signum, siginfo_t *si, void *uc) +{ + if (signum == SIGSEGV || signum == SIGILL) + siglongjmp(jmpbuf, 1); +} + +/* check that a corrupted PAC results in SIGSEGV or SIGILL */ +TEST(corrupt_pac) +{ + struct sigaction sa; + + ASSERT_PAUTH_ENABLED(); + if (sigsetjmp(jmpbuf, 1) == 0) { + sa.sa_sigaction = pac_signal_handler; + sa.sa_flags = SA_SIGINFO | SA_RESETHAND; + sigemptyset(&sa.sa_mask); + + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGILL, &sa, NULL); + + pac_corruptor(); + ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur"); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S new file mode 100644 index 000000000000..aa6588050752 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +.global pac_corruptor + +.text +/* + * Corrupting a single bit of the PAC ensures the authentication will fail. It + * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no + * top byte PAC is tested + */ + pac_corruptor: + paciasp + + /* corrupt the top bit of the PAC */ + eor lr, lr, #1 << 53 + + autiasp + ret -- cgit v1.3.1 From 766d95b1ed93ebdd07ac87490e60e442342f5dc4 Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 18 Sep 2020 11:47:13 +0100 Subject: kselftests/arm64: add nop checks for PAuth tests PAuth adds sign/verify controls to enable and disable groups of instructions in hardware for compatibility with libraries that do not implement PAuth. The kernel always enables them if it detects PAuth. Add a test that checks that each group of instructions is enabled, if the kernel reports PAuth as detected. Note: For groups, for the purpose of this patch, we intend instructions that use a certain key. Signed-off-by: Boyan Karatotev Reviewed-by: Vincenzo Frascino Reviewed-by: Amit Daniel Kachhap Acked-by: Shuah Khan Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200918104715.182310-3-boian4o1@gmail.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/pauth/.gitignore | 1 + tools/testing/selftests/arm64/pauth/Makefile | 7 +++- tools/testing/selftests/arm64/pauth/helper.c | 39 ++++++++++++++++++++ tools/testing/selftests/arm64/pauth/helper.h | 9 +++++ tools/testing/selftests/arm64/pauth/pac.c | 51 ++++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/arm64/pauth/helper.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore index b557c916720a..155137d92722 100644 --- a/tools/testing/selftests/arm64/pauth/.gitignore +++ b/tools/testing/selftests/arm64/pauth/.gitignore @@ -1 +1,2 @@ +exec_target pac diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile index 01d35aaa610a..5c0dd129562f 100644 --- a/tools/testing/selftests/arm64/pauth/Makefile +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -12,7 +12,7 @@ pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/nu ifeq ($(pauth_cc_support),1) TEST_GEN_PROGS := pac -TEST_GEN_FILES := pac_corruptor.o +TEST_GEN_FILES := pac_corruptor.o helper.o endif include ../../lib.mk @@ -23,10 +23,13 @@ ifeq ($(pauth_cc_support),1) $(OUTPUT)/pac_corruptor.o: pac_corruptor.S $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a +$(OUTPUT)/helper.o: helper.c + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + # when -mbranch-protection is enabled and the target architecture is ARMv8.3 or # greater, gcc emits pac* instructions which are not in HINT NOP space, # preventing the tests from occurring at all. Compile for ARMv8.2 so tests can # run on earlier targets and print a meaningful error messages -$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o +$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a endif diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c new file mode 100644 index 000000000000..2c201e7d0d50 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include "helper.h" + +size_t keyia_sign(size_t ptr) +{ + asm volatile("paciza %0" : "+r" (ptr)); + return ptr; +} + +size_t keyib_sign(size_t ptr) +{ + asm volatile("pacizb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyda_sign(size_t ptr) +{ + asm volatile("pacdza %0" : "+r" (ptr)); + return ptr; +} + +size_t keydb_sign(size_t ptr) +{ + asm volatile("pacdzb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyg_sign(size_t ptr) +{ + /* output is encoded in the upper 32 bits */ + size_t dest = 0; + size_t modifier = 0; + + asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier)); + + return dest; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h index 3e0a2a404bf4..35c4f3357ae3 100644 --- a/tools/testing/selftests/arm64/pauth/helper.h +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -4,6 +4,15 @@ #ifndef _HELPER_H_ #define _HELPER_H_ +#include + void pac_corruptor(void); +/* PAuth sign a value with key ia and modifier value 0 */ +size_t keyia_sign(size_t val); +size_t keyib_sign(size_t val); +size_t keyda_sign(size_t val); +size_t keydb_sign(size_t val); +size_t keyg_sign(size_t val); + #endif diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index 0293310ba70a..bd3d4c0eca9d 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -8,12 +8,25 @@ #include "../../kselftest_harness.h" #include "helper.h" +#define PAC_COLLISION_ATTEMPTS 10 +/* + * The kernel sets TBID by default. So bits 55 and above should remain + * untouched no matter what. + * The VA space size is 48 bits. Bigger is opt-in. + */ +#define PAC_MASK (~0xff80ffffffffffff) #define ASSERT_PAUTH_ENABLED() \ do { \ unsigned long hwcaps = getauxval(AT_HWCAP); \ /* data key instructions are not in NOP space. This prevents a SIGILL */ \ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \ } while (0) +#define ASSERT_GENERIC_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* generic key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \ +} while (0) sigjmp_buf jmpbuf; void pac_signal_handler(int signum, siginfo_t *si, void *uc) @@ -41,4 +54,42 @@ TEST(corrupt_pac) } } +/* + * There are no separate pac* and aut* controls so checking only the pac* + * instructions is sufficient + */ +TEST(pac_instructions_not_nop) +{ + size_t keyia = 0; + size_t keyib = 0; + size_t keyda = 0; + size_t keydb = 0; + + ASSERT_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + keyia |= keyia_sign(i) & PAC_MASK; + keyib |= keyib_sign(i) & PAC_MASK; + keyda |= keyda_sign(i) & PAC_MASK; + keydb |= keydb_sign(i) & PAC_MASK; + } + + ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing"); + ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing"); + ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing"); + ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing"); +} + +TEST(pac_instructions_not_nop_generic) +{ + size_t keyg = 0; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) + keyg |= keyg_sign(i) & PAC_MASK; + + ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); +} + TEST_HARNESS_MAIN -- cgit v1.3.1 From 806a15b2545e6b5949f14ea8401ce295bd38a018 Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 18 Sep 2020 11:47:14 +0100 Subject: kselftests/arm64: add PAuth test for whether exec() changes keys Kernel documentation states that it will change PAuth keys on exec() calls. Verify that all keys are correctly switched to new ones. Signed-off-by: Boyan Karatotev Reviewed-by: Vincenzo Frascino Reviewed-by: Amit Daniel Kachhap Acked-by: Shuah Khan Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200918104715.182310-4-boian4o1@gmail.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/pauth/Makefile | 4 + tools/testing/selftests/arm64/pauth/exec_target.c | 34 +++++ tools/testing/selftests/arm64/pauth/helper.h | 10 ++ tools/testing/selftests/arm64/pauth/pac.c | 150 ++++++++++++++++++++++ 4 files changed, 198 insertions(+) create mode 100644 tools/testing/selftests/arm64/pauth/exec_target.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile index 5c0dd129562f..72e290b0b10c 100644 --- a/tools/testing/selftests/arm64/pauth/Makefile +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -13,6 +13,7 @@ pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/nu ifeq ($(pauth_cc_support),1) TEST_GEN_PROGS := pac TEST_GEN_FILES := pac_corruptor.o helper.o +TEST_GEN_PROGS_EXTENDED := exec_target endif include ../../lib.mk @@ -30,6 +31,9 @@ $(OUTPUT)/helper.o: helper.c # greater, gcc emits pac* instructions which are not in HINT NOP space, # preventing the tests from occurring at all. Compile for ARMv8.2 so tests can # run on earlier targets and print a meaningful error messages +$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a + $(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a endif diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c new file mode 100644 index 000000000000..4435600ca400 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/exec_target.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include +#include +#include + +#include "helper.h" + +int main(void) +{ + struct signatures signed_vals; + unsigned long hwcaps; + size_t val; + + fread(&val, sizeof(size_t), 1, stdin); + + /* don't try to execute illegal (unimplemented) instructions) caller + * should have checked this and keep worker simple + */ + hwcaps = getauxval(AT_HWCAP); + + if (hwcaps & HWCAP_PACA) { + signed_vals.keyia = keyia_sign(val); + signed_vals.keyib = keyib_sign(val); + signed_vals.keyda = keyda_sign(val); + signed_vals.keydb = keydb_sign(val); + } + signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0; + + fwrite(&signed_vals, sizeof(struct signatures), 1, stdout); + + return 0; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h index 35c4f3357ae3..652496c7b411 100644 --- a/tools/testing/selftests/arm64/pauth/helper.h +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -6,6 +6,16 @@ #include +#define NKEYS 5 + +struct signatures { + size_t keyia; + size_t keyib; + size_t keyda; + size_t keydb; + size_t keyg; +}; + void pac_corruptor(void); /* PAuth sign a value with key ia and modifier value 0 */ diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index bd3d4c0eca9d..b363ad6a0b50 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -2,6 +2,8 @@ // Copyright (C) 2020 ARM Limited #include +#include +#include #include #include @@ -28,6 +30,117 @@ do { \ ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \ } while (0) +void sign_specific(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); +} + +void sign_all(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); + sign->keyg = keyg_sign(val); +} + +int n_same(struct signatures *old, struct signatures *new, int nkeys) +{ + int res = 0; + + res += old->keyia == new->keyia; + res += old->keyib == new->keyib; + res += old->keyda == new->keyda; + res += old->keydb == new->keydb; + if (nkeys == NKEYS) + res += old->keyg == new->keyg; + + return res; +} + +int exec_sign_all(struct signatures *signed_vals, size_t val) +{ + int new_stdin[2]; + int new_stdout[2]; + int status; + ssize_t ret; + pid_t pid; + + ret = pipe(new_stdin); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + ret = pipe(new_stdout); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + pid = fork(); + // child + if (pid == 0) { + dup2(new_stdin[0], STDIN_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + dup2(new_stdout[1], STDOUT_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + close(new_stdin[0]); + close(new_stdin[1]); + close(new_stdout[0]); + close(new_stdout[1]); + + ret = execl("exec_target", "exec_target", (char *)NULL); + if (ret == -1) { + perror("exec returned error"); + exit(1); + } + } + + close(new_stdin[0]); + close(new_stdout[1]); + + ret = write(new_stdin[1], &val, sizeof(size_t)); + if (ret == -1) { + perror("write returned error"); + return -1; + } + + /* + * wait for the worker to finish, so that read() reads all data + * will also context switch with worker so that this function can be used + * for context switch tests + */ + waitpid(pid, &status, 0); + if (WIFEXITED(status) == 0) { + fprintf(stderr, "worker exited unexpectedly\n"); + return -1; + } + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "worker exited with error\n"); + return -1; + } + + ret = read(new_stdout[0], signed_vals, sizeof(struct signatures)); + if (ret == -1) { + perror("read returned error"); + return -1; + } + + return 0; +} + sigjmp_buf jmpbuf; void pac_signal_handler(int signum, siginfo_t *si, void *uc) { @@ -92,4 +205,41 @@ TEST(pac_instructions_not_nop_generic) ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); } +/* + * fork() does not change keys. Only exec() does so call a worker program. + * Its only job is to sign a value and report back the resutls + */ +TEST(exec_changed_keys) +{ + struct signatures new_keys; + struct signatures old_keys; + int ret; + int same = 10; + int nkeys = NKEYS; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + ret = exec_sign_all(&new_keys, i); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + if (nkeys == NKEYS) + sign_all(&old_keys, i); + else + sign_specific(&old_keys, i); + + ret = n_same(&old_keys, &new_keys, nkeys); + if (ret < same) + same = ret; + } + + ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same); +} + TEST_HARNESS_MAIN -- cgit v1.3.1 From d21435e9670b11a02bcb1b5683dddd50da61966d Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 18 Sep 2020 11:47:15 +0100 Subject: kselftests/arm64: add PAuth tests for single threaded consistency and differently initialized keys PAuth adds 5 different keys that can be used to sign addresses. Add a test that verifies that the kernel initializes them to different values and preserves them across context switches. Signed-off-by: Boyan Karatotev Reviewed-by: Vincenzo Frascino Reviewed-by: Amit Daniel Kachhap Acked-by: Shuah Khan Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200918104715.182310-5-boian4o1@gmail.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/pauth/pac.c | 123 ++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index b363ad6a0b50..592fe538506e 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -1,11 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2020 ARM Limited +#define _GNU_SOURCE + #include #include #include #include #include +#include #include "../../kselftest_harness.h" #include "helper.h" @@ -17,6 +20,7 @@ * The VA space size is 48 bits. Bigger is opt-in. */ #define PAC_MASK (~0xff80ffffffffffff) +#define ARBITRARY_VALUE (0x1234) #define ASSERT_PAUTH_ENABLED() \ do { \ unsigned long hwcaps = getauxval(AT_HWCAP); \ @@ -61,13 +65,37 @@ int n_same(struct signatures *old, struct signatures *new, int nkeys) return res; } +int n_same_single_set(struct signatures *sign, int nkeys) +{ + size_t vals[nkeys]; + int same = 0; + + vals[0] = sign->keyia & PAC_MASK; + vals[1] = sign->keyib & PAC_MASK; + vals[2] = sign->keyda & PAC_MASK; + vals[3] = sign->keydb & PAC_MASK; + + if (nkeys >= 4) + vals[4] = sign->keyg & PAC_MASK; + + for (int i = 0; i < nkeys - 1; i++) { + for (int j = i + 1; j < nkeys; j++) { + if (vals[i] == vals[j]) + same += 1; + } + } + return same; +} + int exec_sign_all(struct signatures *signed_vals, size_t val) { int new_stdin[2]; int new_stdout[2]; int status; + int i; ssize_t ret; pid_t pid; + cpu_set_t mask; ret = pipe(new_stdin); if (ret == -1) { @@ -81,6 +109,20 @@ int exec_sign_all(struct signatures *signed_vals, size_t val) return -1; } + /* + * pin this process and all its children to a single CPU, so it can also + * guarantee a context switch with its child + */ + sched_getaffinity(0, sizeof(mask), &mask); + + for (i = 0; i < sizeof(cpu_set_t); i++) + if (CPU_ISSET(i, &mask)) + break; + + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); + pid = fork(); // child if (pid == 0) { @@ -205,6 +247,44 @@ TEST(pac_instructions_not_nop_generic) ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); } +TEST(single_thread_different_keys) +{ + int same = 10; + int nkeys = NKEYS; + int tmp; + struct signatures signed_vals; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + /* + * In Linux the PAC field can be up to 7 bits wide. Even if keys are + * different, there is about 5% chance for PACs to collide with + * different addresses. This chance rapidly increases with fewer bits + * allocated for the PAC (e.g. wider address). A comparison of the keys + * directly will be more reliable. + * All signed values need to be different at least once out of n + * attempts to be certain that the keys are different + */ + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + if (nkeys == NKEYS) + sign_all(&signed_vals, i); + else + sign_specific(&signed_vals, i); + + tmp = n_same_single_set(&signed_vals, nkeys); + if (tmp < same) + same = tmp; + } + + ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same); +} + /* * fork() does not change keys. Only exec() does so call a worker program. * Its only job is to sign a value and report back the resutls @@ -242,4 +322,47 @@ TEST(exec_changed_keys) ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same); } +TEST(context_switch_keep_keys) +{ + int ret; + struct signatures trash; + struct signatures before; + struct signatures after; + + ASSERT_PAUTH_ENABLED(); + + sign_specific(&before, ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + sign_specific(&after, ARBITRARY_VALUE); + + ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching"); + ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching"); + ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching"); + ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching"); +} + +TEST(context_switch_keep_keys_generic) +{ + int ret; + struct signatures trash; + size_t before; + size_t after; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + before = keyg_sign(ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + after = keyg_sign(ARBITRARY_VALUE); + + ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching"); +} + TEST_HARNESS_MAIN -- cgit v1.3.1 From ca765153eb90577e5fda281485048427b80a9a77 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:32 +0100 Subject: selftests: arm64: Test case for enumeration of SVE vector lengths Add a test case that verifies that we can enumerate the SVE vector lengths on systems where we detect SVE, and that those SVE vector lengths are valid. This program was written by Dave Martin and adapted to kselftest by me. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-2-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-probe-vls.c | 58 ++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/sve-probe-vls.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c new file mode 100644 index 000000000000..b29cbc642c57 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +int main(int argc, char **argv) +{ + unsigned int vq; + int vl; + static unsigned int vqs[SVE_VQ_MAX]; + unsigned int nvqs = 0; + + ksft_print_header(); + ksft_set_plan(2); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available"); + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + if (!sve_vl_valid(vl)) + ksft_exit_fail_msg("VL %d invalid\n", vl); + vq = sve_vq_from_vl(vl); + + if (!(nvqs < SVE_VQ_MAX)) + ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n", + nvqs); + vqs[nvqs++] = vq; + } + ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs); + ksft_test_result_pass("All vector lengths valid\n"); + + /* Print out the vector lengths in ascending order: */ + while (nvqs--) + ksft_print_msg("%u\n", 16 * vqs[nvqs]); + + ksft_exit_pass(); +} -- cgit v1.3.1 From 0dca276ac4d20d4071b3d3095b1ad33269ea5272 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:33 +0100 Subject: selftests: arm64: Add test for the SVE ptrace interface Add a test case that does some basic verification of the SVE ptrace interface, forking off a child with known values in the registers and then using ptrace to inspect and manipulate the SVE registers of the child, including in FPSIMD mode to account for sharing between the SVE and FPSIMD registers. This program was written by Dave Martin and modified for kselftest by me. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-3-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace-asm.S | 33 +++ tools/testing/selftests/arm64/fp/sve-ptrace.c | 336 ++++++++++++++++++++++ 2 files changed, 369 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/sve-ptrace-asm.S create mode 100644 tools/testing/selftests/arm64/fp/sve-ptrace.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S new file mode 100644 index 000000000000..3e81f9fab574 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin +#include + +.arch_extension sve + +.globl sve_store_patterns + +sve_store_patterns: + mov x1, x0 + + index z0.b, #0, #1 + str q0, [x1] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x10] + + mov z1.d, z0.d + str q0, [x1, #0x20] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x30] + + mov z1.d, z0.d + str q0, [x1, #0x40] + + ret + +.size sve_store_patterns, . - sve_store_patterns +.type sve_store_patterns, @function diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c new file mode 100644 index 000000000000..b2282be6f938 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +/* and don't like each other, so: */ +#ifndef NT_ARM_SVE +#define NT_ARM_SVE 0x405 +#endif + +/* Number of registers filled in by sve_store_patterns */ +#define NR_VREGS 5 + +void sve_store_patterns(__uint128_t v[NR_VREGS]); + +static void dump(const void *buf, size_t size) +{ + size_t i; + const unsigned char *p = buf; + + for (i = 0; i < size; ++i) + printf(" %.2x", *p++); +} + +static int check_vregs(const __uint128_t vregs[NR_VREGS]) +{ + int i; + int ok = 1; + + for (i = 0; i < NR_VREGS; ++i) { + printf("# v[%d]:", i); + dump(&vregs[i], sizeof vregs[i]); + putchar('\n'); + + if (vregs[i] != vregs[0]) + ok = 0; + } + + return ok; +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) +{ + struct user_sve_header *sve; + void *p; + size_t sz = sizeof *sve; + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov)) + goto error; + + sve = *buf; + if (sve->size <= sz) + break; + + sz = sve->size; + } + + return sve; + +error: + return NULL; +} + +static int set_sve(pid_t pid, const struct user_sve_header *sve) +{ + struct iovec iov; + + iov.iov_base = (void *)sve; + iov.iov_len = sve->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); +} + +static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num, + unsigned int vlmax) +{ + unsigned int vq; + unsigned int i; + + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + ksft_exit_fail_msg("Dumping non-SVE register\n"); + + if (vlmax > sve->vl) + vlmax = sve->vl; + + vq = sve_vq_from_vl(sve->vl); + for (i = 0; i < num; ++i) { + printf("# z%u:", i); + dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i), + vlmax); + printf("%s\n", vlmax == sve->vl ? "" : " ..."); + } +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + void *svebuf = NULL, *newsvebuf; + size_t svebufsz = 0, newsvebufsz; + struct user_sve_header *sve, *new_sve; + struct user_fpsimd_state *fpsimd; + unsigned int i, j; + unsigned char *p; + unsigned int vq; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n", + status); + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + sve = get_sve(pid, &svebuf, &svebufsz); + if (!sve) { + int e = errno; + + ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } else { + ksft_test_result_pass("get_sve\n"); + } + + ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, + "FPSIMD registers\n"); + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) + goto error; + + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof fpsimd->vregs[i]; ++j) + p[j] = j; + } + + if (set_sve(pid, sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(FPSIMD): %s\n", + strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + vq = sve_vq_from_vl(sve->vl); + + newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + new_sve = newsvebuf = malloc(newsvebufsz); + if (!new_sve) { + errno = ENOMEM; + perror(NULL); + goto error; + } + + *new_sve = *sve; + new_sve->flags &= ~SVE_PT_REGS_MASK; + new_sve->flags |= SVE_PT_REGS_SVE; + memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0), + 0, SVE_PT_SVE_ZREG_SIZE(vq)); + new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + if (set_sve(pid, new_sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); + if (!new_sve) { + int e = errno; + + ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, + "SVE registers\n"); + if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + goto error; + + dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); + + p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); + for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { + unsigned char expected = i; + + if (__BYTE_ORDER == __BIG_ENDIAN) + expected = sizeof fpsimd->vregs[0] - 1 - expected; + + ksft_test_result(p[i] == expected, "p[%d] == expected\n", i); + if (p[i] != expected) + goto error; + } + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + __uint128_t v[NR_VREGS]; + pid_t child; + + ksft_print_header(); + ksft_set_plan(20); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available\n"); + + sve_store_patterns(v); + + if (!check_vregs(v)) + ksft_exit_fail_msg("Initial check_vregs() failed\n"); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return 0; +} -- cgit v1.3.1 From 5e992c638ea55d928e43d3c1aab1bd8e13835e6e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:34 +0100 Subject: selftests: arm64: Add stress tests for FPSMID and SVE context switching Add programs sve-test and fpsimd-test which spin reading and writing to the SVE and FPSIMD registers, verifying the operations they perform. The intended use is to leave them running to stress the context switch code's handling of these registers which isn't compatible with what kselftest does so they're not integrated into the framework but there's no other obvious testsuite where they fit so let's store them here. These tests were written by Dave Martin and lightly adapted by me. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-4-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/asm-offsets.h | 11 + tools/testing/selftests/arm64/fp/assembler.h | 57 +++ tools/testing/selftests/arm64/fp/fpsimd-test.S | 482 ++++++++++++++++++ tools/testing/selftests/arm64/fp/sve-test.S | 672 +++++++++++++++++++++++++ 4 files changed, 1222 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/asm-offsets.h create mode 100644 tools/testing/selftests/arm64/fp/assembler.h create mode 100644 tools/testing/selftests/arm64/fp/fpsimd-test.S create mode 100644 tools/testing/selftests/arm64/fp/sve-test.S (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h new file mode 100644 index 000000000000..a180851496ec --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-offsets.h @@ -0,0 +1,11 @@ +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 +#define SIGUSR1 10 +#define SIGTERM 15 +#define SIGINT 2 +#define SIGABRT 6 +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h new file mode 100644 index 000000000000..8944f2189206 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +.macro __for from:req, to:req + .if (\from) == (\to) + _for__body %\from + .else + __for \from, %(\from) + ((\to) - (\from)) / 2 + __for %(\from) + ((\to) - (\from)) / 2 + 1, \to + .endif +.endm + +.macro _for var:req, from:req, to:req, insn:vararg + .macro _for__body \var:req + .noaltmacro + \insn + .altmacro + .endm + + .altmacro + __for \from, \to + .noaltmacro + + .purgem _for__body +.endm + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +.macro define_accessor name, num, insn + .macro \name\()_entry n + \insn \n, 1 + ret + .endm + +function \name + adr x2, .L__accessor_tbl\@ + add x2, x2, x0, lsl #3 + br x2 + +.L__accessor_tbl\@: + _for x, 0, (\num) - 1, \name\()_entry \x +endfunction + + .purgem \name\()_entry +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S new file mode 100644 index 000000000000..1c5556bdd11d --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin +// +// Simple FPSIMD context switch test +// Repeatedly writes unique test patterns into each FPSIMD register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include +#include "assembler.h" +#include "asm-offsets.h" + +#define NVR 32 +#define MAXVL_B (128 / 8) + +.macro _vldr Vn:req, Xt:req + ld1 {v\Vn\().2d}, [x\Xt] +.endm + +.macro _vstr Vn:req, Xt:req + st1 {v\Vn\().2d}, [x\Xt] +.endm + +// Generate accessor functions to read/write programmatically selected +// FPSIMD registers. +// x0 is the register index to access +// x1 is the memory address to read from (getv,setp) or store to (setv,setp) +// All clobber x0-x2 +define_accessor setv, NVR, _vldr +define_accessor getv, NVR, _vstr + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +vref: + .space MAXVL_B * NVR +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for FPSIMD V-register V +.macro _adrv xd, xn, nrtmp + ldr \xd, =vref + mov x\nrtmp, #16 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a FPSIMD V-register +// x0: pid +// x1: register number +// x2: generation +function setup_vreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrv x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setv + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 1f + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne barf + subs x2, x2, #1 + b.ne 0b + +1: ret +endfunction + +// Verify that a FPSIMD V-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x5. +function check_vreg + mov x3, x30 + + _adrv x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getv + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random V-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #7 + movi v9.16b, #9 + movi v31.8b, #31 + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + mov x19, #128 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + + mov x21, #0 // Set up V-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S new file mode 100644 index 000000000000..f95074c9b48b --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin +// +// Simple Scalable Vector Extension context switch test +// Repeatedly writes unique test patterns into each SVE register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include +#include "assembler.h" +#include "asm-offsets.h" + +#define NZR 32 +#define NPR 16 +#define MAXVL_B (2048 / 8) + +.arch_extension sve + +.macro _sve_ldr_v zt, xn + ldr z\zt, [x\xn] +.endm + +.macro _sve_str_v zt, xn + str z\zt, [x\xn] +.endm + +.macro _sve_ldr_p pt, xn + ldr p\pt, [x\xn] +.endm + +.macro _sve_str_p pt, xn + str p\pt, [x\xn] +.endm + +// Generate accessor functions to read/write programmatically selected +// SVE registers. +// x0 is the register index to access +// x1 is the memory address to read from (getz,setp) or store to (setz,setp) +// All clobber x0-x2 +define_accessor setz, NZR, _sve_ldr_v +define_accessor getz, NZR, _sve_str_v +define_accessor setp, NPR, _sve_ldr_p +define_accessor getp, NPR, _sve_str_p + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +zref: + .space MAXVL_B * NZR +pref: + .space MAXVL_B / 8 * NPR +ffrref: + .space MAXVL_B / 8 +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:22 32-bit lane index +// bits 21:16 register number +// bits 15: 0 pid + +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for SVE Z-register Z +.macro _adrz xd, xn, nrtmp + ldr \xd, =zref + rdvl x\nrtmp, #1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Get the address of shadow data for SVE P-register P +.macro _adrp xd, xn, nrtmp + ldr \xd, =pref + rdvl x\nrtmp, #1 + lsr x\nrtmp, x\nrtmp, #3 + sub \xn, \xn, #NZR + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a SVE Z-register +// x0: pid +// x1: register number +// x2: generation +function setup_zreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrz x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setz + + ret x4 +endfunction + +// Set up test pattern in a SVE P-register +// x0: pid +// x1: register number +// x2: generation +function setup_preg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrp x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setp + + ret x4 +endfunction + +// Set up test pattern in the FFR +// x0: pid +// x2: generation +// Beware: corrupts P0. +function setup_ffr + mov x4, x30 + + bl pattern + ldr x0, =ffrref + ldr x1, =scratch + rdvl x2, #1 + lsr x2, x2, #3 + bl memcpy + + mov x0, #0 + ldr x1, =ffrref + bl setp + + wrffr p0.b + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a SVE Z-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_zreg + mov x3, x30 + + _adrz x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getz + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that a SVE P-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_preg + mov x3, x30 + + _adrp x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getp + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that the FFR matches its shadow in memory, else abort +// Beware -- corrupts P0. +// Clobbers x0-x5. +function check_ffr + mov x3, x30 + + ldr x4, =scratch + rdvl x5, #1 + lsr x5, x5, #3 + + mov x0, x4 + mov x1, x5 + bl memfill_ae + + rdffr p0.b + mov x0, #0 + mov x1, x4 + bl getp + + ldr x0, =ffrref + mov x1, x4 + mov x2, x5 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random Z-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 + // And P0 + rdffr p0.b + // And FFR + wrffr p15.b + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + rdvl x19, #8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdvl x0, #8 + cmp x0, x19 + b.ne vl_barf + + mov x21, #0 // Set up Z-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + + mov x0, x20 // Set up FFR & shadow with test pattern + mov x1, #NZR + NPR + and x2, x22, #0xf + bl setup_ffr + +0: mov x0, x20 // Set up P-regs & shadow with test pattern + mov x1, x21 + and x2, x22, #0xf + bl setup_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: +// mov x8, #__NR_sched_yield // Encourage preemption +// svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + +0: mov x0, x21 + bl check_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + + bl check_ffr + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction -- cgit v1.3.1 From fc7e611f9f38de7166c5f8951df93f7351542448 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:35 +0100 Subject: selftests: arm64: Add utility to set SVE vector lengths vlset is a small utility for use in conjunction with tests like the sve-test stress test which allows another executable to be invoked with a configured SVE vector length. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-5-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vlset.c | 155 +++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/vlset.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c new file mode 100644 index 000000000000..308d27a68226 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/vlset.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2019 ARM Limited. + * Original author: Dave Martin + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int inherit = 0; +static int no_inherit = 0; +static int force = 0; +static unsigned long vl; + +static const struct option options[] = { + { "force", no_argument, NULL, 'f' }, + { "inherit", no_argument, NULL, 'i' }, + { "max", no_argument, NULL, 'M' }, + { "no-inherit", no_argument, &no_inherit, 1 }, + { "help", no_argument, NULL, '?' }, + {} +}; + +static char const *program_name; + +static int parse_options(int argc, char **argv) +{ + int c; + char *rest; + + program_name = strrchr(argv[0], '/'); + if (program_name) + ++program_name; + else + program_name = argv[0]; + + while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1) + switch (c) { + case 'M': vl = SVE_VL_MAX; break; + case 'f': force = 1; break; + case 'i': inherit = 1; break; + case 0: break; + default: goto error; + } + + if (inherit && no_inherit) + goto error; + + if (!vl) { + /* vector length */ + if (optind >= argc) + goto error; + + errno = 0; + vl = strtoul(argv[optind], &rest, 0); + if (*rest) { + vl = ULONG_MAX; + errno = EINVAL; + } + if (vl == ULONG_MAX && errno) { + fprintf(stderr, "%s: %s: %s\n", + program_name, argv[optind], strerror(errno)); + goto error; + } + + ++optind; + } + + /* command */ + if (optind >= argc) + goto error; + + return 0; + +error: + fprintf(stderr, + "Usage: %s [-f | --force] " + "[-i | --inherit | --no-inherit] " + "{-M | --max | } " + " [ ...]\n", + program_name); + return -1; +} + +int main(int argc, char **argv) +{ + int ret = 126; /* same as sh(1) command-not-executable error */ + long flags; + char *path; + int t, e; + + if (parse_options(argc, argv)) + return 2; /* same as sh(1) builtin incorrect-usage */ + + if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) { + fprintf(stderr, "%s: Invalid vector length %lu\n", + program_name, vl); + return 2; /* same as sh(1) builtin incorrect-usage */ + } + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + fprintf(stderr, "%s: Scalable Vector Extension not present\n", + program_name); + + if (!force) + goto error; + + fputs("Going ahead anyway (--force): " + "This is a debug option. Don't rely on it.\n", + stderr); + } + + flags = PR_SVE_SET_VL_ONEXEC; + if (inherit) + flags |= PR_SVE_VL_INHERIT; + + t = prctl(PR_SVE_SET_VL, vl | flags); + if (t < 0) { + fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + + t = prctl(PR_SVE_GET_VL); + if (t == -1) { + fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + flags = PR_SVE_VL_LEN_MASK; + flags = t & ~flags; + + assert(optind < argc); + path = argv[optind]; + + execvp(path, &argv[optind]); + e = errno; + if (errno == ENOENT) + ret = 127; /* same as sh(1) not-found error */ + fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e)); + +error: + return ret; /* same as sh(1) not-executable error */ +} -- cgit v1.3.1 From 25f47e3eb66e6ee31b3ed3f8c0b7bdce098106fe Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:36 +0100 Subject: selftests: arm64: Add wrapper scripts for stress tests Add wrapper scripts which invoke fpsimd-test and sve-test with several copies per CPU such that the context switch code will be appropriately exercised. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-6-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/fpsimd-stress | 60 ++++++++++++++++++++++++++ tools/testing/selftests/arm64/fp/sve-stress | 59 +++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100755 tools/testing/selftests/arm64/fp/fpsimd-stress create mode 100755 tools/testing/selftests/arm64/fp/sve-stress (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress new file mode 100755 index 000000000000..781b5b022eaf --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-stress @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT +trap child_died CHLD + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./fpsimd-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress new file mode 100755 index 000000000000..24dd0922cc02 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./sve-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 -- cgit v1.3.1 From e093256d14fbf9e8aaf02ed0ac69087eef6792bd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:37 +0100 Subject: selftests: arm64: Add build and documentation for FP tests Integrate the FP tests with the build system and add some documentation for the ones run outside the kselftest infrastructure. The content in the README was largely written by Dave Martin with edits by me. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-7-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/Makefile | 2 +- tools/testing/selftests/arm64/fp/.gitignore | 5 ++ tools/testing/selftests/arm64/fp/Makefile | 17 +++++ tools/testing/selftests/arm64/fp/README | 100 ++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/fp/.gitignore create mode 100644 tools/testing/selftests/arm64/fp/Makefile create mode 100644 tools/testing/selftests/arm64/fp/README (limited to 'tools') diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 525506fd97b9..463d56278f8f 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth +ARM64_SUBTARGETS ?= tags signal pauth fp else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore new file mode 100644 index 000000000000..d66f76d2a650 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -0,0 +1,5 @@ +fpsimd-test +sve-probe-vls +sve-ptrace +sve-test +vlset diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile new file mode 100644 index 000000000000..a57009d3a0dc --- /dev/null +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := sve-ptrace sve-probe-vls +TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset + +all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) + +fpsimd-test: fpsimd-test.o + $(CC) -nostdlib $^ -o $@ +sve-ptrace: sve-ptrace.o sve-ptrace-asm.o +sve-probe-vls: sve-probe-vls.o +sve-test: sve-test.o + $(CC) -nostdlib $^ -o $@ +vlset: vlset.o + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README new file mode 100644 index 000000000000..03e3dad865d8 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/README @@ -0,0 +1,100 @@ +This directory contains a mix of tests integrated with kselftest and +standalone stress tests. + +kselftest tests +=============== + +sve-probe-vls - Checks the SVE vector length enumeration interface +sve-ptrace - Checks the SVE ptrace interface + +Running the non-kselftest tests +=============================== + +sve-stress performs an SVE context switch stress test, as described +below. + +(The fpsimd-stress test works the same way; just substitute "fpsimd" for +"sve" in the following commands.) + + +The test runs until killed by the user. + +If no context switch error was detected, you will see output such as +the following: + +$ ./sve-stress +(wait for some time) +^C +Vector length: 512 bits +PID: 1573 +Terminated by signal 15, no error, iterations=9467, signals=1014 +Vector length: 512 bits +PID: 1575 +Terminated by signal 15, no error, iterations=9448, signals=1028 +Vector length: 512 bits +PID: 1577 +Terminated by signal 15, no error, iterations=9436, signals=1039 +Vector length: 512 bits +PID: 1579 +Terminated by signal 15, no error, iterations=9421, signals=1039 +Vector length: 512 bits +PID: 1581 +Terminated by signal 15, no error, iterations=9403, signals=1039 +Vector length: 512 bits +PID: 1583 +Terminated by signal 15, no error, iterations=9385, signals=1036 +Vector length: 512 bits +PID: 1585 +Terminated by signal 15, no error, iterations=9376, signals=1039 +Vector length: 512 bits +PID: 1587 +Terminated by signal 15, no error, iterations=9361, signals=1039 +Vector length: 512 bits +PID: 1589 +Terminated by signal 15, no error, iterations=9350, signals=1039 + + +If an error was detected, details of the mismatch will be printed +instead of "no error". + +Ideally, the test should be allowed to run for many minutes or hours +to maximise test coverage. + + +KVM stress testing +================== + +To try to reproduce the bugs that we have been observing, sve-stress +should be run in parallel in two KVM guests, while simultaneously +running on the host. + +1) Start 2 guests, using the following command for each: + +$ lkvm run --console=virtio -pconsole=hvc0 --sve Image + +(Depending on the hardware GIC implementation, you may also need +--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I +can't remember whether my branch is new enough for that. Try without +the option first.) + +Kvmtool occupies the terminal until you kill it (Ctrl+A x), +or until the guest terminates. It is therefore recommended to run +each instance in separate terminal (use screen or ssh etc.) This +allows multiple guests to be run in parallel while running other +commands on the host. + +Within the guest, the host filesystem is accessible, mounted on /host. + +2) Run the sve-stress on *each* guest with the Vector-Length set to 32: +guest$ ./vlset --inherit 32 ./sve-stress + +3) Run the sve-stress on the host with the maximum Vector-Length: +host$ ./vlset --inherit --max ./sve-stress + + +Again, the test should be allowed to run for many minutes or hours to +maximise test coverage. + +If no error is detected, you will see output from each sve-stress +instance similar to that illustrated above; otherwise details of the +observed mismatches will be printed. -- cgit v1.3.1 From f4f803984c3685f416a74e9e2fa7d39bdafbe02b Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 15 Sep 2020 08:53:16 +0100 Subject: objtool: Remove useless tests before save_reg() save_reg already checks that the register being saved does not already have a saved state. Remove redundant checks before processing a register storing operation. Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4e2f703b6a25..fd2edab8e672 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2030,7 +2030,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* drap: push %rbp */ cfi->stack_size = 0; - } else if (regs[op->src.reg].base == CFI_UNDEFINED) { + } else { /* drap: push %reg */ save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size); @@ -2059,9 +2059,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* save drap offset so we know when to restore it */ cfi->drap_offset = op->dest.offset; - } - - else if (regs[op->src.reg].base == CFI_UNDEFINED) { + } else { /* drap: mov reg, disp(%rbp) */ save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset); -- cgit v1.3.1 From fb136219f0e2b417d84e67b2a3adc1f933997d04 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 15 Sep 2020 08:53:17 +0100 Subject: objtool: Ignore unreachable fake jumps It is possible for alternative code to unconditionally jump out of the alternative region. In such a case, if a fake jump is added at the end of the alternative instructions, the fake jump will never be reached. Since the fake jump is just a mean to make sure code validation does not go beyond the set of alternatives, reaching it is not a requirement. Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index fd2edab8e672..cd7c6698d316 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2648,6 +2648,9 @@ static bool ignore_unreachable_insn(struct instruction *insn) !strcmp(insn->sec->name, ".altinstr_aux")) return true; + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET) + return true; + if (!insn->func) return false; -- cgit v1.3.1 From b6ed6cf4a3acdeab9aed8e0a524850761ec9b152 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 15 Sep 2020 13:38:15 +0200 Subject: selftests/bpf: Fix endianness issue in sk_assign server_map's value size is 8, but the test tries to put an int there. This sort of works on x86 (unless followed by non-0), but hard fails on s390. Fix by using __s64 instead of int. Fixes: 2d7824ffd25c ("selftests: bpf: Add test for sk_assign") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200915113815.3768217-1-iii@linux.ibm.com --- tools/testing/selftests/bpf/prog_tests/sk_assign.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index a49a26f95a8b..3a469099f30d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -265,7 +265,7 @@ void test_sk_assign(void) TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false), TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true), }; - int server = -1; + __s64 server = -1; int server_map; int self_net; int i; -- cgit v1.3.1 From fec47bbc10b243690f5d0ee484a0bbdee273e71b Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 15 Sep 2020 13:39:28 +0200 Subject: selftests/bpf: Fix endianness issue in test_sockopt_sk getsetsockopt() calls getsockopt() with optlen == 1, but then checks the resulting int. It is ok on little endian, but not on big endian. Fix by checking char instead. Fixes: 8a027dc0d8f5 ("selftests/bpf: add sockopt test that exercises sk helpers") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200915113928.3768496-1-iii@linux.ibm.com --- tools/testing/selftests/bpf/prog_tests/sockopt_sk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 5f54c6aec7f0..b25c9c45c148 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -45,9 +45,9 @@ static int getsetsockopt(void) goto err; } - if (*(int *)big_buf != 0x08) { + if (*big_buf != 0x08) { log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08", - *(int *)big_buf); + (int)*big_buf); goto err; } -- cgit v1.3.1 From d052e1c6909f9ccbdc4112a50796afca19094229 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Aug 2020 17:34:41 +0900 Subject: tools/bootconfig: Show bootconfig compact tree from bootconfig file Show the bootconfig compact tree from the bootconfig file instead of an initrd if the given file has no magic number and is smaller than 32KB. User can use this for checking the syntax error or output checking before applying the bootconfig to initrd. Link: https://lkml.kernel.org/r/159704848156.175360.6621139371000789360.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 81 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index e0878f5f74b1..d165e63b5d5a 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -195,10 +195,55 @@ int load_xbc_from_initrd(int fd, char **buf) return size; } +static void show_xbc_error(const char *data, const char *msg, int pos) +{ + int lin = 1, col, i; + + if (pos < 0) { + pr_err("Error: %s.\n", msg); + return; + } + + /* Note that pos starts from 0 but lin and col should start from 1. */ + col = pos + 1; + for (i = 0; i < pos; i++) { + if (data[i] == '\n') { + lin++; + col = pos - i; + } + } + pr_err("Parse Error: %s at %d:%d\n", msg, lin, col); + +} + +static int init_xbc_with_error(char *buf, int len) +{ + char *copy = strdup(buf); + const char *msg; + int ret, pos; + + if (!copy) + return -ENOMEM; + + ret = xbc_init(buf, &msg, &pos); + if (ret < 0) + show_xbc_error(copy, msg, pos); + free(copy); + + return ret; +} + int show_xbc(const char *path) { int ret, fd; char *buf = NULL; + struct stat st; + + ret = stat(path, &st); + if (ret < 0) { + pr_err("Failed to stat %s: %d\n", path, -errno); + return -errno; + } fd = open(path, O_RDONLY); if (fd < 0) { @@ -207,14 +252,24 @@ int show_xbc(const char *path) } ret = load_xbc_from_initrd(fd, &buf); + close(fd); if (ret < 0) { pr_err("Failed to load a boot config from initrd: %d\n", ret); goto out; } + /* Assume a bootconfig file if it is enough small */ + if (ret == 0 && st.st_size <= XBC_DATA_MAX) { + ret = load_xbc_file(path, &buf); + if (ret < 0) { + pr_err("Failed to load a boot config: %d\n", ret); + goto out; + } + if (init_xbc_with_error(buf, ret) < 0) + goto out; + } xbc_show_compact_tree(); ret = 0; out: - close(fd); free(buf); return ret; @@ -251,27 +306,6 @@ int delete_xbc(const char *path) return ret; } -static void show_xbc_error(const char *data, const char *msg, int pos) -{ - int lin = 1, col, i; - - if (pos < 0) { - pr_err("Error: %s.\n", msg); - return; - } - - /* Note that pos starts from 0 but lin and col should start from 1. */ - col = pos + 1; - for (i = 0; i < pos; i++) { - if (data[i] == '\n') { - lin++; - col = pos - i; - } - } - pr_err("Parse Error: %s at %d:%d\n", msg, lin, col); - -} - int apply_xbc(const char *path, const char *xbc_path) { u32 size, csum; @@ -352,11 +386,12 @@ out: int usage(void) { printf("Usage: bootconfig [OPTIONS] \n" + "Or bootconfig \n" " Apply, delete or show boot config to initrd.\n" " Options:\n" " -a : Apply boot config to initrd\n" " -d : Delete boot config file from initrd\n\n" - " If no option is given, show current applied boot config.\n"); + " If no option is given, show the bootconfig in the given file.\n"); return -1; } -- cgit v1.3.1 From e4f70b7badb40598ceea31c122d7c2fb6203672a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Aug 2020 17:34:51 +0900 Subject: tools/bootconfig: Add list option Add list option (-l) to show the bootconfig in the list style. This is same output of /proc/bootconfig. So users can check how their bootconfig will be shown in procfs. This will help them to write a user-space script to parse the /proc/bootconfig. Link: https://lkml.kernel.org/r/159704849087.175360.8761890802048625207.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 52 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index d165e63b5d5a..78025267df20 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -14,18 +14,19 @@ #include #include -static int xbc_show_value(struct xbc_node *node) +static int xbc_show_value(struct xbc_node *node, bool semicolon) { - const char *val; + const char *val, *eol; char q; int i = 0; + eol = semicolon ? ";\n" : "\n"; xbc_array_for_each_value(node, val) { if (strchr(val, '"')) q = '\''; else q = '"'; - printf("%c%s%c%s", q, val, q, node->next ? ", " : ";\n"); + printf("%c%s%c%s", q, val, q, node->next ? ", " : eol); i++; } return i; @@ -53,7 +54,7 @@ static void xbc_show_compact_tree(void) continue; } else if (cnode && xbc_node_is_value(cnode)) { printf("%s = ", xbc_node_get_data(node)); - xbc_show_value(cnode); + xbc_show_value(cnode, true); } else { printf("%s;\n", xbc_node_get_data(node)); } @@ -77,6 +78,26 @@ static void xbc_show_compact_tree(void) } } +static void xbc_show_list(void) +{ + char key[XBC_KEYLEN_MAX]; + struct xbc_node *leaf; + const char *val; + int ret = 0; + + xbc_for_each_key_value(leaf, val) { + ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX); + if (ret < 0) + break; + printf("%s = ", key); + if (!val || val[0] == '\0') { + printf("\"\"\n"); + continue; + } + xbc_show_value(xbc_node_get_child(leaf), false); + } +} + /* Simple real checksum */ int checksum(unsigned char *buf, int len) { @@ -233,7 +254,7 @@ static int init_xbc_with_error(char *buf, int len) return ret; } -int show_xbc(const char *path) +int show_xbc(const char *path, bool list) { int ret, fd; char *buf = NULL; @@ -267,7 +288,10 @@ int show_xbc(const char *path) if (init_xbc_with_error(buf, ret) < 0) goto out; } - xbc_show_compact_tree(); + if (list) + xbc_show_list(); + else + xbc_show_compact_tree(); ret = 0; out: free(buf); @@ -390,7 +414,8 @@ int usage(void) " Apply, delete or show boot config to initrd.\n" " Options:\n" " -a : Apply boot config to initrd\n" - " -d : Delete boot config file from initrd\n\n" + " -d : Delete boot config file from initrd\n" + " -l : list boot config in initrd or file\n\n" " If no option is given, show the bootconfig in the given file.\n"); return -1; } @@ -399,10 +424,10 @@ int main(int argc, char **argv) { char *path = NULL; char *apply = NULL; - bool delete = false; + bool delete = false, list = false; int opt; - while ((opt = getopt(argc, argv, "hda:")) != -1) { + while ((opt = getopt(argc, argv, "hda:l")) != -1) { switch (opt) { case 'd': delete = true; @@ -410,14 +435,17 @@ int main(int argc, char **argv) case 'a': apply = optarg; break; + case 'l': + list = true; + break; case 'h': default: return usage(); } } - if (apply && delete) { - pr_err("Error: You can not specify both -a and -d at once.\n"); + if ((apply && delete) || (delete && list) || (apply && list)) { + pr_err("Error: You can give one of -a, -d or -l at once.\n"); return usage(); } @@ -433,5 +461,5 @@ int main(int argc, char **argv) else if (delete) return delete_xbc(path); - return show_xbc(path); + return show_xbc(path, list); } -- cgit v1.3.1 From 483ce6708dce7116ef2c83b36a1cfe28a36c4fc9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Aug 2020 17:35:01 +0900 Subject: tools/bootconfig: Make all functions static Make all functions static except for main(). This is just a cleanup. Link: https://lkml.kernel.org/r/159704850135.175360.12465608936326167517.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 78025267df20..eb92027817a7 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -99,7 +99,7 @@ static void xbc_show_list(void) } /* Simple real checksum */ -int checksum(unsigned char *buf, int len) +static int checksum(unsigned char *buf, int len) { int i, sum = 0; @@ -111,7 +111,7 @@ int checksum(unsigned char *buf, int len) #define PAGE_SIZE 4096 -int load_xbc_fd(int fd, char **buf, int size) +static int load_xbc_fd(int fd, char **buf, int size) { int ret; @@ -128,7 +128,7 @@ int load_xbc_fd(int fd, char **buf, int size) } /* Return the read size or -errno */ -int load_xbc_file(const char *path, char **buf) +static int load_xbc_file(const char *path, char **buf) { struct stat stat; int fd, ret; @@ -147,7 +147,7 @@ int load_xbc_file(const char *path, char **buf) return ret; } -int load_xbc_from_initrd(int fd, char **buf) +static int load_xbc_from_initrd(int fd, char **buf) { struct stat stat; int ret; @@ -254,7 +254,7 @@ static int init_xbc_with_error(char *buf, int len) return ret; } -int show_xbc(const char *path, bool list) +static int show_xbc(const char *path, bool list) { int ret, fd; char *buf = NULL; @@ -299,7 +299,7 @@ out: return ret; } -int delete_xbc(const char *path) +static int delete_xbc(const char *path) { struct stat stat; int ret = 0, fd, size; @@ -330,7 +330,7 @@ int delete_xbc(const char *path) return ret; } -int apply_xbc(const char *path, const char *xbc_path) +static int apply_xbc(const char *path, const char *xbc_path) { u32 size, csum; char *buf, *data; @@ -407,7 +407,7 @@ out: return ret; } -int usage(void) +static int usage(void) { printf("Usage: bootconfig [OPTIONS] \n" "Or bootconfig \n" -- cgit v1.3.1 From 7e66ef0046ccf896674955b819c27c49783a4deb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Aug 2020 17:35:11 +0900 Subject: tools/bootconfig: Add a script to generate ftrace shell-command from bootconfig Add a bconf2ftrace.sh under tools/bootconfig/scripts which generates a shell script to setup boot-time trace from bootconfig file for testing the bootconfig. bconf2ftrace.sh will take a bootconfig file (includes boot-time tracing) and convert it into a shell-script which is almost same as the boot-time tracer does. If --apply option is given, it also tries to apply those command to the running kernel, which requires the root privilege (or sudo). For example, if you just want to confirm the shell commands, save the output as below. # bconf2ftrace.sh ftrace.bconf > ftrace.sh Or, you can apply it directly. # bconf2ftrace.sh --apply ftrace.bconf Note that some boot-time tracing parameters under kernel.* are not able to set via tracefs nor procfs (e.g. tp_printk, traceoff_on_warning.), so those are ignored. Link: https://lkml.kernel.org/r/159704851101.175360.15119132351139842345.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- MAINTAINERS | 1 + tools/bootconfig/scripts/bconf2ftrace.sh | 189 +++++++++++++++++++++++++++++++ tools/bootconfig/scripts/xbc.sh | 56 +++++++++ 3 files changed, 246 insertions(+) create mode 100755 tools/bootconfig/scripts/bconf2ftrace.sh create mode 100644 tools/bootconfig/scripts/xbc.sh (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index 0d0862b19ce5..5bd82d7da40c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6601,6 +6601,7 @@ F: fs/proc/bootconfig.c F: include/linux/bootconfig.h F: lib/bootconfig.c F: tools/bootconfig/* +F: tools/bootconfig/scripts/* EXYNOS DP DRIVER M: Jingoo Han diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh new file mode 100755 index 000000000000..a46e984fb2ff --- /dev/null +++ b/tools/bootconfig/scripts/bconf2ftrace.sh @@ -0,0 +1,189 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +usage() { + echo "Ftrace boottime trace test tool" + echo "Usage: $0 [--apply] [--debug] BOOTCONFIG-FILE" + echo " --apply: Test actual apply to tracefs (need sudo)" + exit 1 +} + +[ $# -eq 0 ] && usage + +BCONF= +DEBUG= +APPLY= +while [ x"$1" != x ]; do + case "$1" in + "--debug") + DEBUG=$1;; + "--apply") + APPLY=$1;; + *) + [ ! -f $1 ] && usage + BCONF=$1;; + esac + shift 1 +done + +if [ x"$APPLY" != x ]; then + if [ `id -u` -ne 0 ]; then + echo "This must be run by root user. Try sudo." 1>&2 + exec sudo $0 $DEBUG $APPLY $BCONF + fi +fi + +run_cmd() { # command + echo "$*" + if [ x"$APPLY" != x ]; then # apply command + eval $* + fi +} + +if [ x"$DEBUG" != x ]; then + set -x +fi + +TRACEFS=`grep -m 1 -w tracefs /proc/mounts | cut -f 2 -d " "` +if [ -z "$TRACEFS" ]; then + if ! grep -wq debugfs /proc/mounts; then + echo "Error: No tracefs/debugfs was mounted." 1>&2 + exit 1 + fi + TRACEFS=`grep -m 1 -w debugfs /proc/mounts | cut -f 2 -d " "`/tracing + if [ ! -d $TRACEFS ]; then + echo "Error: ftrace is not enabled on this kernel." 1>&2 + exit 1 + fi +fi + +. `dirname $0`/xbc.sh + +######## main ######### +set -e + +xbc_init $BCONF + +set_value_of() { # key file + if xbc_has_key $1; then + val=`xbc_get_val $1 1` + run_cmd "echo '$val' >> $2" + fi +} + +set_array_of() { # key file + if xbc_has_key $1; then + xbc_get_val $1 | while read line; do + run_cmd "echo '$line' >> $2" + done + fi +} + +compose_synth() { # event_name branch + echo -n "$1 " + xbc_get_val $2 | while read field; do echo -n "$field; "; done +} + +setup_event() { # prefix group event [instance] + branch=$1.$2.$3 + if [ "$4" ]; then + eventdir="$TRACEFS/instances/$4/events/$2/$3" + else + eventdir="$TRACEFS/events/$2/$3" + fi + case $2 in + kprobes) + xbc_get_val ${branch}.probes | while read line; do + run_cmd "echo 'p:kprobes/$3 $line' >> $TRACEFS/kprobe_events" + done + ;; + synthetic) + run_cmd "echo '`compose_synth $3 ${branch}.fields`' >> $TRACEFS/synthetic_events" + ;; + esac + + set_value_of ${branch}.filter ${eventdir}/filter + set_array_of ${branch}.actions ${eventdir}/trigger + + if xbc_has_key ${branch}.enable; then + run_cmd "echo 1 > ${eventdir}/enable" + fi +} + +setup_events() { # prefix("ftrace" or "ftrace.instance.INSTANCE") [instance] + prefix="${1}.event" + if xbc_has_branch ${1}.event; then + for grpev in `xbc_subkeys ${1}.event 2`; do + setup_event $prefix ${grpev%.*} ${grpev#*.} $2 + done + fi +} + +size2kb() { # size[KB|MB] + case $1 in + *KB) + echo ${1%KB};; + *MB) + expr ${1%MB} \* 1024;; + *) + expr $1 / 1024 ;; + esac +} + +setup_instance() { # [instance] + if [ "$1" ]; then + instance="ftrace.instance.${1}" + instancedir=$TRACEFS/instances/$1 + else + instance="ftrace" + instancedir=$TRACEFS + fi + + set_array_of ${instance}.options ${instancedir}/trace_options + set_value_of ${instance}.trace_clock ${instancedir}/trace_clock + set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask + set_value_of ${instance}.tracer ${instancedir}/current_tracer + set_array_of ${instance}.ftrace.filters \ + ${instancedir}/set_ftrace_filter + set_array_of ${instance}.ftrace.notrace \ + ${instancedir}/set_ftrace_notrace + + if xbc_has_key ${instance}.alloc_snapshot; then + run_cmd "echo 1 > ${instancedir}/snapshot" + fi + + if xbc_has_key ${instance}.buffer_size; then + size=`xbc_get_val ${instance}.buffer_size 1` + size=`eval size2kb $size` + run_cmd "echo $size >> ${instancedir}/buffer_size_kb" + fi + + setup_events ${instance} $1 + set_array_of ${instance}.events ${instancedir}/set_event +} + +# ftrace global configs (kernel.*) +if xbc_has_key "kernel.dump_on_oops"; then + dump_mode=`xbc_get_val "kernel.dump_on_oops" 1` + [ "$dump_mode" ] && dump_mode=`eval echo $dump_mode` || dump_mode=1 + run_cmd "echo \"$dump_mode\" > /proc/sys/kernel/ftrace_dump_on_oops" +fi + +set_value_of kernel.fgraph_max_depth $TRACEFS/max_graph_depth +set_array_of kernel.fgraph_filters $TRACEFS/set_graph_function +set_array_of kernel.fgraph_notraces $TRACEFS/set_graph_notrace + +# Per-instance/per-event configs +if ! xbc_has_branch "ftrace" ; then + exit 0 +fi + +setup_instance # root instance + +if xbc_has_branch "ftrace.instance"; then + for i in `xbc_subkeys "ftrace.instance" 1`; do + run_cmd "mkdir -p $TRACEFS/instances/$i" + setup_instance $i + done +fi + diff --git a/tools/bootconfig/scripts/xbc.sh b/tools/bootconfig/scripts/xbc.sh new file mode 100644 index 000000000000..b8c84e654556 --- /dev/null +++ b/tools/bootconfig/scripts/xbc.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +# bootconfig utility functions + +XBC_TMPFILE= +XBC_BASEDIR=`dirname $0` +BOOTCONFIG=${BOOTCONFIG:=$XBC_BASEDIR/../bootconfig} +if [ ! -x "$BOOTCONFIG" ]; then + BOOTCONFIG=`which bootconfig` + if [ -z "$BOOTCONFIG" ]; then + echo "Erorr: bootconfig command is not found" 1>&2 + exit 1 + fi +fi + +xbc_cleanup() { + if [ "$XBC_TMPFILE" ]; then + rm -f "$XBC_TMPFILE" + fi +} + +xbc_init() { # bootconfig-file + xbc_cleanup + XBC_TMPFILE=`mktemp bconf-XXXX` + trap xbc_cleanup EXIT TERM + + $BOOTCONFIG -l $1 > $XBC_TMPFILE || exit 1 +} + +nr_args() { # args + echo $# +} + +xbc_get_val() { # key [maxnum] + if [ "$2" ]; then + MAXOPT="-L $2" + fi + grep "^$1 =" $XBC_TMPFILE | cut -d= -f2- | \ + sed -e 's/", /" /g' -e "s/',/' /g" | \ + xargs $MAXOPT -n 1 echo +} + +xbc_has_key() { # key + grep -q "^$1 =" $XBC_TMPFILE +} + +xbc_has_branch() { # prefix-key + grep -q "^$1" $XBC_TMPFILE +} + +xbc_subkeys() { # prefix-key depth + __keys=`echo $1 | sed "s/\./ /g"` + __s=`nr_args $__keys` + grep "^$1" $XBC_TMPFILE | cut -d= -f1| cut -d. -f$((__s + 1))-$((__s + $2)) | uniq +} -- cgit v1.3.1 From 2b86062a34a81427fca082540e3593b5a6b49a13 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Aug 2020 17:35:21 +0900 Subject: tools/bootconfig: Add a script to generates bootconfig from ftrace Add a ftrace2bconf.sh under tools/bootconfig/scripts which generates a bootconfig file from the current ftrace settings. To read the ftrace settings, ftrace2bconf.sh requires the root privilege (or sudo). The ftrace2bconf.sh will output the bootconfig to stdout and error messages to stderr, so usually you'll run it as # ftrace2bconf.sh > ftrace.bconf Note that some ftrace configurations are not supported. For example, function-call/callgraph trace/notrace settings are not supported because the wildcard has been expanded and lost in the ftrace anymore. Link: https://lkml.kernel.org/r/159704852163.175360.16738029520293360558.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/scripts/ftrace2bconf.sh | 244 +++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100755 tools/bootconfig/scripts/ftrace2bconf.sh (limited to 'tools') diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh new file mode 100755 index 000000000000..6c0d4b61e0c2 --- /dev/null +++ b/tools/bootconfig/scripts/ftrace2bconf.sh @@ -0,0 +1,244 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +usage() { + echo "Dump boot-time tracing bootconfig from ftrace" + echo "Usage: $0 [--debug] [ > BOOTCONFIG-FILE]" + exit 1 +} + +DEBUG= +while [ x"$1" != x ]; do + case "$1" in + "--debug") + DEBUG=$1;; + -*) + usage + ;; + esac + shift 1 +done + +if [ x"$DEBUG" != x ]; then + set -x +fi + +TRACEFS=`grep -m 1 -w tracefs /proc/mounts | cut -f 2 -d " "` +if [ -z "$TRACEFS" ]; then + if ! grep -wq debugfs /proc/mounts; then + echo "Error: No tracefs/debugfs was mounted." + exit 1 + fi + TRACEFS=`grep -m 1 -w debugfs /proc/mounts | cut -f 2 -d " "`/tracing + if [ ! -d $TRACEFS ]; then + echo "Error: ftrace is not enabled on this kernel." 1>&2 + exit 1 + fi +fi + +######## main ######### + +set -e + +emit_kv() { # key =|+= value + echo "$@" +} + +global_options() { + val=`cat $TRACEFS/max_graph_depth` + [ $val != 0 ] && emit_kv kernel.fgraph_max_depth = $val + if grep -qv "^#" $TRACEFS/set_graph_function $TRACEFS/set_graph_notrace ; then + cat 1>&2 << EOF +# WARN: kernel.fgraph_filters and kernel.fgraph_notrace are not supported, since the wild card expression was expanded and lost from memory. +EOF + fi +} + +kprobe_event_options() { + cat $TRACEFS/kprobe_events | while read p args; do + case $p in + r*) + cat 1>&2 << EOF +# WARN: A return probe found but it is not supported by bootconfig. Skip it. +EOF + continue;; + esac + p=${p#*:} + event=${p#*/} + group=${p%/*} + if [ $group != "kprobes" ]; then + cat 1>&2 << EOF +# WARN: kprobes group name $group is changed to "kprobes" for bootconfig. +EOF + fi + emit_kv $PREFIX.event.kprobes.$event.probes += $args + done +} + +synth_event_options() { + cat $TRACEFS/synthetic_events | while read event fields; do + emit_kv $PREFIX.event.synthetic.$event.fields = `echo $fields | sed "s/;/,/g"` + done +} + +# Variables resolver +DEFINED_VARS= +UNRESOLVED_EVENTS= + +defined_vars() { # event-dir + grep "^hist" $1/trigger | grep -o ':[a-zA-Z0-9]*=' +} +referred_vars() { + grep "^hist" $1/trigger | grep -o '$[a-zA-Z0-9]*' +} + +per_event_options() { # event-dir + evdir=$1 + # Check the special event which has no filter and no trigger + [ ! -f $evdir/filter ] && return + + if grep -q "^hist:" $evdir/trigger; then + # hist action can refer the undefined variables + __vars=`defined_vars $evdir` + for v in `referred_vars $evdir`; do + if echo $DEFINED_VARS $__vars | grep -vqw ${v#$}; then + # $v is not defined yet, defer it + UNRESOLVED_EVENTS="$UNRESOLVED_EVENTS $evdir" + return; + fi + done + DEFINED_VARS="$DEFINED_VARS "`defined_vars $evdir` + fi + grep -v "^#" $evdir/trigger | while read action active; do + emit_kv $PREFIX.event.$group.$event.actions += \'$action\' + done + + # enable is not checked; this is done by set_event in the instance. + val=`cat $evdir/filter` + if [ "$val" != "none" ]; then + emit_kv $PREFIX.event.$group.$event.filter = "$val" + fi +} + +retry_unresolved() { + unresolved=$UNRESOLVED_EVENTS + UNRESOLVED_EVENTS= + for evdir in $unresolved; do + event=${evdir##*/} + group=${evdir%/*}; group=${group##*/} + per_event_options $evdir + done +} + +event_options() { + # PREFIX and INSTANCE must be set + if [ $PREFIX = "ftrace" ]; then + # define the dynamic events + kprobe_event_options + synth_event_options + fi + for group in `ls $INSTANCE/events/` ; do + [ ! -d $INSTANCE/events/$group ] && continue + for event in `ls $INSTANCE/events/$group/` ;do + [ ! -d $INSTANCE/events/$group/$event ] && continue + per_event_options $INSTANCE/events/$group/$event + done + done + retry=0 + while [ $retry -lt 3 ]; do + retry_unresolved + retry=$((retry + 1)) + done + if [ "$UNRESOLVED_EVENTS" ]; then + cat 1>&2 << EOF +! ERROR: hist triggers in $UNRESOLVED_EVENTS use some undefined variables. +EOF + fi +} + +is_default_trace_option() { # option +grep -qw $1 << EOF +print-parent +nosym-offset +nosym-addr +noverbose +noraw +nohex +nobin +noblock +trace_printk +annotate +nouserstacktrace +nosym-userobj +noprintk-msg-only +context-info +nolatency-format +record-cmd +norecord-tgid +overwrite +nodisable_on_free +irq-info +markers +noevent-fork +nopause-on-trace +function-trace +nofunction-fork +nodisplay-graph +nostacktrace +notest_nop_accept +notest_nop_refuse +EOF +} + +instance_options() { # [instance-name] + if [ $# -eq 0 ]; then + PREFIX="ftrace" + INSTANCE=$TRACEFS + else + PREFIX="ftrace.instance.$1" + INSTANCE=$TRACEFS/instances/$1 + fi + val= + for i in `cat $INSTANCE/trace_options`; do + is_default_trace_option $i && continue + val="$val, $i" + done + [ "$val" ] && emit_kv $PREFIX.options = "${val#,}" + val="local" + for i in `cat $INSTANCE/trace_clock` ; do + [ "${i#*]}" ] && continue + i=${i%]}; val=${i#[} + done + [ $val != "local" ] && emit_kv $PREFIX.trace_clock = $val + val=`cat $INSTANCE/buffer_size_kb` + if echo $val | grep -vq "expanded" ; then + emit_kv $PREFIX.buffer_size = $val"KB" + fi + if grep -q "is allocated" $INSTANCE/snapshot ; then + emit_kv $PREFIX.alloc_snapshot + fi + val=`cat $INSTANCE/tracing_cpumask` + if [ `echo $val | sed -e s/f//g`x != x ]; then + emit_kv $PREFIX.cpumask = $val + fi + + val= + for i in `cat $INSTANCE/set_event`; do + val="$val, $i" + done + [ "$val" ] && emit_kv $PREFIX.events = "${val#,}" + val=`cat $INSTANCE/current_tracer` + [ $val != nop ] && emit_kv $PREFIX.tracer = $val + if grep -qv "^#" $INSTANCE/set_ftrace_filter $INSTANCE/set_ftrace_notrace; then + cat 1>&2 << EOF +# WARN: kernel.ftrace.filters and kernel.ftrace.notrace are not supported, since the wild card expression was expanded and lost from memory. +EOF + fi + event_options +} + +global_options +instance_options +for i in `ls $TRACEFS/instances` ; do + instance_options $i +done -- cgit v1.3.1 From 5675fd4ef51f0b505a7f802e4d23a37336d521f0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Aug 2020 17:35:32 +0900 Subject: tools/bootconfig: Add --init option for bconf2ftrace.sh Since the ftrace current setting may conflict with the new setting from bootconfig, add the --init option to initialize ftrace before setting for bconf2ftrace.sh. E.g. $ bconf2ftrace.sh --init boottrace.bconf This initialization method copied from selftests/ftrace. Link: https://lkml.kernel.org/r/159704853203.175360.17029578033994278231.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/scripts/bconf2ftrace.sh | 12 +++- tools/bootconfig/scripts/ftrace.sh | 109 +++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 tools/bootconfig/scripts/ftrace.sh (limited to 'tools') diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh index a46e984fb2ff..595e164dc352 100755 --- a/tools/bootconfig/scripts/bconf2ftrace.sh +++ b/tools/bootconfig/scripts/bconf2ftrace.sh @@ -3,8 +3,9 @@ usage() { echo "Ftrace boottime trace test tool" - echo "Usage: $0 [--apply] [--debug] BOOTCONFIG-FILE" + echo "Usage: $0 [--apply|--init] [--debug] BOOTCONFIG-FILE" echo " --apply: Test actual apply to tracefs (need sudo)" + echo " --init: Initialize ftrace before applying (imply --apply)" exit 1 } @@ -13,12 +14,16 @@ usage() { BCONF= DEBUG= APPLY= +INIT= while [ x"$1" != x ]; do case "$1" in "--debug") DEBUG=$1;; "--apply") APPLY=$1;; + "--init") + APPLY=$1 + INIT=$1;; *) [ ! -f $1 ] && usage BCONF=$1;; @@ -57,6 +62,11 @@ if [ -z "$TRACEFS" ]; then fi fi +if [ x"$INIT" != x ]; then + . `dirname $0`/ftrace.sh + (cd $TRACEFS; initialize_ftrace) +fi + . `dirname $0`/xbc.sh ######## main ######### diff --git a/tools/bootconfig/scripts/ftrace.sh b/tools/bootconfig/scripts/ftrace.sh new file mode 100644 index 000000000000..186eed923041 --- /dev/null +++ b/tools/bootconfig/scripts/ftrace.sh @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: GPL-2.0-only + +clear_trace() { # reset trace output + echo > trace +} + +disable_tracing() { # stop trace recording + echo 0 > tracing_on +} + +enable_tracing() { # start trace recording + echo 1 > tracing_on +} + +reset_tracer() { # reset the current tracer + echo nop > current_tracer +} + +reset_trigger_file() { + # remove action triggers first + grep -H ':on[^:]*(' $@ | + while read line; do + cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["` + file=`echo $line | cut -f1 -d:` + echo "!$cmd" >> $file + done + grep -Hv ^# $@ | + while read line; do + cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["` + file=`echo $line | cut -f1 -d:` + echo "!$cmd" > $file + done +} + +reset_trigger() { # reset all current setting triggers + if [ -d events/synthetic ]; then + reset_trigger_file events/synthetic/*/trigger + fi + reset_trigger_file events/*/*/trigger +} + +reset_events_filter() { # reset all current setting filters + grep -v ^none events/*/*/filter | + while read line; do + echo 0 > `echo $line | cut -f1 -d:` + done +} + +reset_ftrace_filter() { # reset all triggers in set_ftrace_filter + if [ ! -f set_ftrace_filter ]; then + return 0 + fi + echo > set_ftrace_filter + grep -v '^#' set_ftrace_filter | while read t; do + tr=`echo $t | cut -d: -f2` + if [ "$tr" = "" ]; then + continue + fi + if ! grep -q "$t" set_ftrace_filter; then + continue; + fi + name=`echo $t | cut -d: -f1 | cut -d' ' -f1` + if [ $tr = "enable_event" -o $tr = "disable_event" ]; then + tr=`echo $t | cut -d: -f2-4` + limit=`echo $t | cut -d: -f5` + else + tr=`echo $t | cut -d: -f2` + limit=`echo $t | cut -d: -f3` + fi + if [ "$limit" != "unlimited" ]; then + tr="$tr:$limit" + fi + echo "!$name:$tr" > set_ftrace_filter + done +} + +disable_events() { + echo 0 > events/enable +} + +clear_synthetic_events() { # reset all current synthetic events + grep -v ^# synthetic_events | + while read line; do + echo "!$line" >> synthetic_events + done +} + +initialize_ftrace() { # Reset ftrace to initial-state +# As the initial state, ftrace will be set to nop tracer, +# no events, no triggers, no filters, no function filters, +# no probes, and tracing on. + disable_tracing + reset_tracer + reset_trigger + reset_events_filter + reset_ftrace_filter + disable_events + [ -f set_event_pid ] && echo > set_event_pid + [ -f set_ftrace_pid ] && echo > set_ftrace_pid + [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace + [ -f set_graph_function ] && echo | tee set_graph_* + [ -f stack_trace_filter ] && echo > stack_trace_filter + [ -f kprobe_events ] && echo > kprobe_events + [ -f uprobe_events ] && echo > uprobe_events + [ -f synthetic_events ] && echo > synthetic_events + [ -f snapshot ] && echo 0 > snapshot + clear_trace + enable_tracing +} -- cgit v1.3.1 From 05b52c6625278cc6ed1245a569167f86a971ff86 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:20 -0700 Subject: selftests/seccomp: Use __NR_mknodat instead of __NR_mknod The __NR_mknod syscall doesn't exist on arm64 (only __NR_mknodat). Switch to the modern syscall. Fixes: ad5682184a81 ("selftests/seccomp: Check for EPOLLHUP for user_notif") Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-16-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c5002fc25b00..6ddef9fc7ea5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3746,7 +3746,7 @@ TEST(user_notification_filter_empty) if (pid == 0) { int listener; - listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); if (listener < 0) _exit(EXIT_FAILURE); -- cgit v1.3.1 From a6a4d78419a04095221ec2b518edefb080218d55 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:06 -0700 Subject: selftests/seccomp: Refactor arch register macros to avoid xtensa special case To avoid an xtensa special-case, refactor all arch register macros to take the register variable instead of depending on the macro expanding as a struct member name. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-2-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 97 +++++++++++++-------------- 1 file changed, 47 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 6ddef9fc7ea5..a261643db4b8 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1698,64 +1698,64 @@ TEST_F(TRACE_poke, getpid_runs_normally) } #if defined(__x86_64__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_rax -# define SYSCALL_RET rax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_rax +# define SYSCALL_RET(_regs) (_regs).rax #elif defined(__i386__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_eax -# define SYSCALL_RET eax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_eax +# define SYSCALL_RET(_regs) (_regs).eax #elif defined(__arm__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM ARM_r7 -# define SYSCALL_RET ARM_r0 +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).ARM_r7 +# define SYSCALL_RET(_regs) (_regs).ARM_r0 #elif defined(__aarch64__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM regs[8] -# define SYSCALL_RET regs[0] +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[8] +# define SYSCALL_RET(_regs) (_regs).regs[0] #elif defined(__riscv) && __riscv_xlen == 64 -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM a7 -# define SYSCALL_RET a0 +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).a7 +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__csky__) -# define ARCH_REGS struct pt_regs -#if defined(__CSKYABIV2__) -# define SYSCALL_NUM regs[3] -#else -# define SYSCALL_NUM regs[9] -#endif -# define SYSCALL_RET a0 +# define ARCH_REGS struct pt_regs +# if defined(__CSKYABIV2__) +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# else +# define SYSCALL_NUM(_regs) (_regs).regs[9] +# endif +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__hppa__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM gr[20] -# define SYSCALL_RET gr[28] +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).gr[20] +# define SYSCALL_RET(_regs) (_regs).gr[28] #elif defined(__powerpc__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM gpr[0] -# define SYSCALL_RET gpr[3] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[0] +# define SYSCALL_RET(_regs) (_regs).gpr[3] #elif defined(__s390__) -# define ARCH_REGS s390_regs -# define SYSCALL_NUM gprs[2] -# define SYSCALL_RET gprs[2] +# define ARCH_REGS s390_regs +# define SYSCALL_NUM(_regs) (_regs).gprs[2] +# define SYSCALL_RET(_regs) (_regs).gprs[2] # define SYSCALL_NUM_RET_SHARE_REG #elif defined(__mips__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM regs[2] -# define SYSCALL_SYSCALL_NUM regs[4] -# define SYSCALL_RET regs[2] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[2] +# define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_RET(_regs) (_regs).regs[2] # define SYSCALL_NUM_RET_SHARE_REG #elif defined(__xtensa__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM syscall +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).syscall /* * On xtensa syscall return value is in the register * a2 of the current window which is not fixed. */ -#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2] +#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] #elif defined(__sh__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM gpr[3] -# define SYSCALL_RET gpr[0] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[3] +# define SYSCALL_RET(_regs) (_regs).gpr[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1804,10 +1804,10 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) #endif #if defined(__mips__) - if (regs.SYSCALL_NUM == __NR_O32_Linux) + if (SYSCALL_NUM(regs) == __NR_O32_Linux) return regs.SYSCALL_SYSCALL_NUM; #endif - return regs.SYSCALL_NUM; + return SYSCALL_NUM(regs); } /* Architecture-specific syscall changing routine. */ @@ -1830,14 +1830,14 @@ void change_syscall(struct __test_metadata *_metadata, defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ defined(__xtensa__) || defined(__csky__) || defined(__sh__) { - regs.SYSCALL_NUM = syscall; + SYSCALL_NUM(regs) = syscall; } #elif defined(__mips__) { - if (regs.SYSCALL_NUM == __NR_O32_Linux) + if (SYSCALL_NUM(regs) == __NR_O32_Linux) regs.SYSCALL_SYSCALL_NUM = syscall; else - regs.SYSCALL_NUM = syscall; + SYSCALL_NUM(regs) = syscall; } #elif defined(__arm__) @@ -1871,11 +1871,8 @@ void change_syscall(struct __test_metadata *_metadata, if (syscall == -1) #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); - -#elif defined(__xtensa__) - regs.SYSCALL_RET(regs) = result; #else - regs.SYSCALL_RET = result; + SYSCALL_RET(regs) = result; #endif #ifdef HAVE_GETREGS -- cgit v1.3.1 From 31c36eb87c85b6de0f341b1184f90137106a9f81 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:07 -0700 Subject: selftests/seccomp: Provide generic syscall setting macro In order to avoid "#ifdef"s in the main function bodies, create a new macro, SYSCALL_NUM_SET(), where arch-specific logic can live. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-3-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index a261643db4b8..c44abe7c6a3c 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1760,6 +1760,17 @@ TEST_F(TRACE_poke, getpid_runs_normally) # error "Do not know how to find your architecture's registers and syscalls" #endif +/* + * Most architectures can change the syscall by just updating the + * associated register. This is the default if not defined above. + */ +#ifndef SYSCALL_NUM_SET +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + SYSCALL_NUM(_regs) = (_nr); \ + } while (0) +#endif + /* When the syscall return can't be changed, stub out the tests for it. */ #ifdef SYSCALL_NUM_RET_SHARE_REG # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) @@ -1830,14 +1841,14 @@ void change_syscall(struct __test_metadata *_metadata, defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ defined(__xtensa__) || defined(__csky__) || defined(__sh__) { - SYSCALL_NUM(regs) = syscall; + SYSCALL_NUM_SET(regs, syscall); } #elif defined(__mips__) { if (SYSCALL_NUM(regs) == __NR_O32_Linux) regs.SYSCALL_SYSCALL_NUM = syscall; else - SYSCALL_NUM(regs) = syscall; + SYSCALL_NUM_SET(regs, syscall); } #elif defined(__arm__) -- cgit v1.3.1 From a084a6cba37cfa7c03e88f86ade961fb1d7c18a2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:08 -0700 Subject: selftests/seccomp: mips: Define SYSCALL_NUM_SET macro Remove the mips special-case in change_syscall(). Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-4-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c44abe7c6a3c..dc9e3e2c3db5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1742,6 +1742,13 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[2] # define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + (_regs).regs[4] = _nr; \ + else \ + (_regs).regs[2] = _nr; \ + } while (0) # define SYSCALL_RET(_regs) (_regs).regs[2] # define SYSCALL_NUM_RET_SHARE_REG #elif defined(__xtensa__) @@ -1839,17 +1846,11 @@ void change_syscall(struct __test_metadata *_metadata, #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ - defined(__xtensa__) || defined(__csky__) || defined(__sh__) + defined(__xtensa__) || defined(__csky__) || defined(__sh__) || \ + defined(__mips__) { SYSCALL_NUM_SET(regs, syscall); } -#elif defined(__mips__) - { - if (SYSCALL_NUM(regs) == __NR_O32_Linux) - regs.SYSCALL_SYSCALL_NUM = syscall; - else - SYSCALL_NUM_SET(regs, syscall); - } #elif defined(__arm__) # ifndef PTRACE_SET_SYSCALL -- cgit v1.3.1 From aa8fbb80a8034af23b08310142af1d9824d25533 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:09 -0700 Subject: selftests/seccomp: arm: Define SYSCALL_NUM_SET macro Remove the arm special-case in change_syscall(). Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-5-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index dc9e3e2c3db5..aa275ed8e183 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1708,6 +1708,11 @@ TEST_F(TRACE_poke, getpid_runs_normally) #elif defined(__arm__) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).ARM_r7 +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) # define SYSCALL_RET(_regs) (_regs).ARM_r0 #elif defined(__aarch64__) # define ARCH_REGS struct user_pt_regs @@ -1847,20 +1852,11 @@ void change_syscall(struct __test_metadata *_metadata, #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ defined(__xtensa__) || defined(__csky__) || defined(__sh__) || \ - defined(__mips__) + defined(__mips__) || defined(__arm__) { SYSCALL_NUM_SET(regs, syscall); } -#elif defined(__arm__) -# ifndef PTRACE_SET_SYSCALL -# define PTRACE_SET_SYSCALL 23 -# endif - { - ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); - EXPECT_EQ(0, ret); - } - #elif defined(__aarch64__) # ifndef NT_ARM_SYSTEM_CALL # define NT_ARM_SYSTEM_CALL 0x404 -- cgit v1.3.1 From 0dd7d68572d9393765b57c001adc30822e3003ed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:10 -0700 Subject: selftests/seccomp: arm64: Define SYSCALL_NUM_SET macro Remove the arm64 special-case in change_syscall(). Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-6-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index aa275ed8e183..ef7f65468069 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1717,6 +1717,18 @@ TEST_F(TRACE_poke, getpid_runs_normally) #elif defined(__aarch64__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[8] +# ifndef NT_ARM_SYSTEM_CALL +# define NT_ARM_SYSTEM_CALL 0x404 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + struct iovec __v; \ + typeof(_nr) __nr = (_nr); \ + __v.iov_base = &__nr; \ + __v.iov_len = sizeof(__nr); \ + EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ + NT_ARM_SYSTEM_CALL, &__v)); \ + } while (0) # define SYSCALL_RET(_regs) (_regs).regs[0] #elif defined(__riscv) && __riscv_xlen == 64 # define ARCH_REGS struct user_regs_struct @@ -1852,23 +1864,10 @@ void change_syscall(struct __test_metadata *_metadata, #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ defined(__xtensa__) || defined(__csky__) || defined(__sh__) || \ - defined(__mips__) || defined(__arm__) + defined(__mips__) || defined(__arm__) || defined(__aarch64__) { SYSCALL_NUM_SET(regs, syscall); } - -#elif defined(__aarch64__) -# ifndef NT_ARM_SYSTEM_CALL -# define NT_ARM_SYSTEM_CALL 0x404 -# endif - { - iov.iov_base = &syscall; - iov.iov_len = sizeof(syscall); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, - &iov); - EXPECT_EQ(0, ret); - } - #else ASSERT_EQ(1, 0) { TH_LOG("How is the syscall changed on this architecture?"); -- cgit v1.3.1 From 37989de731dbea5af143806192c4cd1484990ab4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:11 -0700 Subject: selftests/seccomp: mips: Remove O32-specific macro Instead of having the mips O32 macro special-cased, pull the logic into the SYSCALL_NUM() macro. Additionally include the ABI headers, since these appear to have been missing, leaving __NR_O32_Linux undefined. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-7-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index ef7f65468069..7976cb480912 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1756,9 +1756,19 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define SYSCALL_RET(_regs) (_regs).gprs[2] # define SYSCALL_NUM_RET_SHARE_REG #elif defined(__mips__) +# include +# include +# include # define ARCH_REGS struct pt_regs -# define SYSCALL_NUM(_regs) (_regs).regs[2] -# define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_NUM(_regs) \ + ({ \ + typeof((_regs).regs[2]) _nr; \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + _nr = (_regs).regs[4]; \ + else \ + _nr = (_regs).regs[2]; \ + _nr; \ + }) # define SYSCALL_NUM_SET(_regs, _nr) \ do { \ if ((_regs).regs[2] == __NR_O32_Linux) \ @@ -1838,10 +1848,6 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) } #endif -#if defined(__mips__) - if (SYSCALL_NUM(regs) == __NR_O32_Linux) - return regs.SYSCALL_SYSCALL_NUM; -#endif return SYSCALL_NUM(regs); } -- cgit v1.3.1 From 78f26627fd36cb74277dd562ec277aee384525a1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:12 -0700 Subject: selftests/seccomp: Remove syscall setting #ifdefs With all architectures now using the common SYSCALL_NUM_SET() macro, the arch-specific #ifdef can be removed from change_syscall() itself. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-8-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7976cb480912..e4b2b9468ff9 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1865,20 +1865,9 @@ void change_syscall(struct __test_metadata *_metadata, iov.iov_len = sizeof(regs); ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); #endif - EXPECT_EQ(0, ret) {} + EXPECT_EQ(0, ret); -#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ - defined(__xtensa__) || defined(__csky__) || defined(__sh__) || \ - defined(__mips__) || defined(__arm__) || defined(__aarch64__) - { - SYSCALL_NUM_SET(regs, syscall); - } -#else - ASSERT_EQ(1, 0) { - TH_LOG("How is the syscall changed on this architecture?"); - } -#endif + SYSCALL_NUM_SET(regs, syscall); /* If syscall is skipped, change return value. */ if (syscall == -1) @@ -1888,6 +1877,7 @@ void change_syscall(struct __test_metadata *_metadata, SYSCALL_RET(regs) = result; #endif + /* Flush any register changes made. */ #ifdef HAVE_GETREGS ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); #else -- cgit v1.3.1 From fdbaa798eaf52a3f1d5d86c5bc035fe8dcc3a384 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:13 -0700 Subject: selftests/seccomp: Convert HAVE_GETREG into ARCH_GETREG/ARCH_SETREG Instead of special-casing the get/set-registers routines, move the HAVE_GETREG logic into the new ARCH_GETREG() and ARCH_SETREG() macros. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-9-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e4b2b9468ff9..68f1f132a517 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1821,20 +1821,21 @@ TEST_F(TRACE_poke, getpid_runs_normally) } while (0) #endif -/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for +/* + * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) -#define HAVE_GETREGS +# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) +# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) #endif /* Architecture-specific syscall fetching routine. */ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) { ARCH_REGS regs; -#ifdef HAVE_GETREGS - EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { - TH_LOG("PTRACE_GETREGS failed"); +#ifdef ARCH_GETREGS + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return -1; } #else @@ -1855,17 +1856,19 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) void change_syscall(struct __test_metadata *_metadata, pid_t tracee, int syscall, int result) { - int ret; ARCH_REGS regs; -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); +#ifdef ARCH_GETREGS + EXPECT_EQ(0, ARCH_GETREGS(regs)) { + return; + } #else + int ret; struct iovec iov; iov.iov_base = ®s; iov.iov_len = sizeof(regs); ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); -#endif EXPECT_EQ(0, ret); +#endif SYSCALL_NUM_SET(regs, syscall); @@ -1878,14 +1881,14 @@ void change_syscall(struct __test_metadata *_metadata, #endif /* Flush any register changes made. */ -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); +#ifdef ARCH_SETREGS + EXPECT_EQ(0, ARCH_SETREGS(regs)); #else iov.iov_base = ®s; iov.iov_len = sizeof(regs); ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); -#endif EXPECT_EQ(0, ret); +#endif } void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, -- cgit v1.3.1 From dc2ad165f4fbef0fe1028b6b3720c5bec034874f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:14 -0700 Subject: selftests/seccomp: Convert REGSET calls into ARCH_GETREG/ARCH_SETREG Consolidate the REGSET logic into the new ARCH_GETREG() and ARCH_SETREG() macros, avoiding more #ifdef code in function bodies. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-10-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 42 ++++++++++----------------- 1 file changed, 15 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 68f1f132a517..00056e067846 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1828,26 +1828,29 @@ TEST_F(TRACE_poke, getpid_runs_normally) #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) +#else +# define ARCH_GETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) +# define ARCH_SETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) #endif /* Architecture-specific syscall fetching routine. */ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) { ARCH_REGS regs; -#ifdef ARCH_GETREGS - EXPECT_EQ(0, ARCH_GETREGS(regs)) { - return -1; - } -#else - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { - TH_LOG("PTRACE_GETREGSET failed"); + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return -1; } -#endif return SYSCALL_NUM(regs); } @@ -1857,18 +1860,10 @@ void change_syscall(struct __test_metadata *_metadata, pid_t tracee, int syscall, int result) { ARCH_REGS regs; -#ifdef ARCH_GETREGS + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return; } -#else - int ret; - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); - EXPECT_EQ(0, ret); -#endif SYSCALL_NUM_SET(regs, syscall); @@ -1881,14 +1876,7 @@ void change_syscall(struct __test_metadata *_metadata, #endif /* Flush any register changes made. */ -#ifdef ARCH_SETREGS EXPECT_EQ(0, ARCH_SETREGS(regs)); -#else - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); - EXPECT_EQ(0, ret); -#endif } void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, -- cgit v1.3.1 From e4e8e5d28d5e1dac24f775452d4cc6f49f5c069e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:15 -0700 Subject: selftests/seccomp: Avoid redundant register flushes When none of the registers have changed, don't flush them back. This can happen if the architecture uses a non-register way to change the syscall (e.g. arm64) , and a return value hasn't been written. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-11-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 00056e067846..638cea8cb23d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1859,11 +1859,12 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) void change_syscall(struct __test_metadata *_metadata, pid_t tracee, int syscall, int result) { - ARCH_REGS regs; + ARCH_REGS orig, regs; EXPECT_EQ(0, ARCH_GETREGS(regs)) { return; } + orig = regs; SYSCALL_NUM_SET(regs, syscall); @@ -1876,7 +1877,8 @@ void change_syscall(struct __test_metadata *_metadata, #endif /* Flush any register changes made. */ - EXPECT_EQ(0, ARCH_SETREGS(regs)); + if (memcmp(&orig, ®s, sizeof(orig)) != 0) + EXPECT_EQ(0, ARCH_SETREGS(regs)); } void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, -- cgit v1.3.1 From f04cf78bbfcd1fa5b1819613a5f354b228f36e03 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:16 -0700 Subject: selftests/seccomp: Remove SYSCALL_NUM_RET_SHARE_REG in favor of SYSCALL_RET_SET Instead of special-casing the specific case of shared registers, create a default SYSCALL_RET_SET() macro (mirroring SYSCALL_NUM_SET()), that writes to the SYSCALL_RET register. For architectures that can't set the return value (for whatever reason), they can define SYSCALL_RET_SET() without an associated SYSCALL_RET() macro. This also paves the way for architectures that need to do special things to set the return value (e.g. powerpc). Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-12-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 33 +++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 638cea8cb23d..84766a001ed0 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1753,8 +1753,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) #elif defined(__s390__) # define ARCH_REGS s390_regs # define SYSCALL_NUM(_regs) (_regs).gprs[2] -# define SYSCALL_RET(_regs) (_regs).gprs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__mips__) # include # include @@ -1776,8 +1776,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) else \ (_regs).regs[2] = _nr; \ } while (0) -# define SYSCALL_RET(_regs) (_regs).regs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__xtensa__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).syscall @@ -1804,9 +1804,26 @@ TEST_F(TRACE_poke, getpid_runs_normally) SYSCALL_NUM(_regs) = (_nr); \ } while (0) #endif +/* + * Most architectures can change the syscall return value by just + * writing to the SYSCALL_RET register. This is the default if not + * defined above. If an architecture cannot set the return value + * (for example when the syscall and return value register is + * shared), report it with TH_LOG() in an arch-specific definition + * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. + */ +#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) +# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" +#endif +#ifndef SYSCALL_RET_SET +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + SYSCALL_RET(_regs) = (_val); \ + } while (0) +#endif /* When the syscall return can't be changed, stub out the tests for it. */ -#ifdef SYSCALL_NUM_RET_SHARE_REG +#ifndef SYSCALL_RET # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else # define EXPECT_SYSCALL_RETURN(val, action) \ @@ -1870,11 +1887,7 @@ void change_syscall(struct __test_metadata *_metadata, /* If syscall is skipped, change return value. */ if (syscall == -1) -#ifdef SYSCALL_NUM_RET_SHARE_REG - TH_LOG("Can't modify syscall return on this architecture"); -#else - SYSCALL_RET(regs) = result; -#endif + SYSCALL_RET_SET(regs, result); /* Flush any register changes made. */ if (memcmp(&orig, ®s, sizeof(orig)) != 0) -- cgit v1.3.1 From 46138329faeac3598f5a4dc991a174386b6de833 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:17 -0700 Subject: selftests/seccomp: powerpc: Fix seccomp return value testing On powerpc, the errno is not inverted, and depends on ccr.so being set. Add this to a powerpc definition of SYSCALL_RET_SET(). Co-developed-by: Thadeu Lima de Souza Cascardo Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/linux-kselftest/20200911181012.171027-1-cascardo@canonical.com/ Fixes: 5d83c2b37d43 ("selftests/seccomp: Add powerpc support") Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-13-keescook@chromium.org Reviewed-by: Michael Ellerman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 84766a001ed0..bc0fb463c709 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1750,6 +1750,21 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).gpr[0] # define SYSCALL_RET(_regs) (_regs).gpr[3] +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + typeof(_val) _result = (_val); \ + /* \ + * A syscall error is signaled by CR0 SO bit \ + * and the code is stored as a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = result; \ + (_regs).ccr &= ~0x10000000; \ + } \ + } while (0) #elif defined(__s390__) # define ARCH_REGS s390_regs # define SYSCALL_NUM(_regs) (_regs).gprs[2] -- cgit v1.3.1 From 2b232a22d8225df419a92ca69ddeeb4e5fe902f7 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 15 Sep 2020 08:53:18 +0100 Subject: objtool: Handle calling non-function symbols in other sections Relocation for a call destination could point to a symbol that has type STT_NOTYPE. Lookup such a symbol when no function is available. Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index cd7c6698d316..a4796e32bbd1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -815,6 +815,17 @@ static void remove_insn_ops(struct instruction *insn) } } +static struct symbol *find_call_destination(struct section *sec, unsigned long offset) +{ + struct symbol *call_dest; + + call_dest = find_func_by_offset(sec, offset); + if (!call_dest) + call_dest = find_symbol_by_offset(sec, offset); + + return call_dest; +} + /* * Find the destination instructions for all calls. */ @@ -832,9 +843,7 @@ static int add_call_destinations(struct objtool_file *file) insn->offset, insn->len); if (!reloc) { dest_off = arch_jump_destination(insn); - insn->call_dest = find_func_by_offset(insn->sec, dest_off); - if (!insn->call_dest) - insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); + insn->call_dest = find_call_destination(insn->sec, dest_off); if (insn->ignore) continue; @@ -852,8 +861,8 @@ static int add_call_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_off = arch_dest_reloc_offset(reloc->addend); - insn->call_dest = find_func_by_offset(reloc->sym->sec, - dest_off); + insn->call_dest = find_call_destination(reloc->sym->sec, + dest_off); if (!insn->call_dest) { WARN_FUNC("can't find call dest symbol at %s+0x%lx", insn->sec, insn->offset, -- cgit v1.3.1 From 14db1f0a93331d0958e90da522c429ff0890d2d6 Mon Sep 17 00:00:00 2001 From: Ilie Halip Date: Sat, 19 Sep 2020 09:41:18 +0300 Subject: objtool: Ignore unreachable trap after call to noreturn functions With CONFIG_UBSAN_TRAP enabled, the compiler may insert a trap instruction after a call to a noreturn function. In this case, objtool warns that the UD2 instruction is unreachable. This is a behavior seen with Clang, from the oldest version capable of building the mainline x64_64 kernel (9.0), to the latest experimental version (12.0). Objtool silences similar warnings (trap after dead end instructions), so so expand that check to include dead end functions. Cc: Nick Desaulniers Cc: Rong Chen Cc: Marco Elver Cc: Philip Li Cc: Borislav Petkov Cc: kasan-dev@googlegroups.com Cc: x86@kernel.org Cc: clang-built-linux@googlegroups.com BugLink: https://github.com/ClangBuiltLinux/linux/issues/1148 Link: https://lore.kernel.org/lkml/CAKwvOdmptEpi8fiOyWUo=AiZJiX+Z+VHJOM2buLPrWsMTwLnyw@mail.gmail.com Suggested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Reported-by: kbuild test robot Signed-off-by: Ilie Halip Tested-by: Sedat Dilek Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a4796e32bbd1..2df9f769412e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2638,9 +2638,10 @@ static bool is_ubsan_insn(struct instruction *insn) "__ubsan_handle_builtin_unreachable")); } -static bool ignore_unreachable_insn(struct instruction *insn) +static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn) { int i; + struct instruction *prev_insn; if (insn->ignore || insn->type == INSN_NOP) return true; @@ -2668,8 +2669,11 @@ static bool ignore_unreachable_insn(struct instruction *insn) * __builtin_unreachable(). The BUG() macro has an unreachable() after * the UD2, which causes GCC's undefined trap logic to emit another UD2 * (or occasionally a JMP to UD2). + * + * It may also insert a UD2 after calling a __noreturn function. */ - if (list_prev_entry(insn, list)->dead_end && + prev_insn = list_prev_entry(insn, list); + if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) && (insn->type == INSN_BUG || (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && insn->jump_dest->type == INSN_BUG))) @@ -2796,7 +2800,7 @@ static int validate_reachable_instructions(struct objtool_file *file) return 0; for_each_insn(file, insn) { - if (insn->visited || ignore_unreachable_insn(insn)) + if (insn->visited || ignore_unreachable_insn(file, insn)) continue; WARN_FUNC("unreachable instruction", insn->sec, insn->offset); -- cgit v1.3.1 From 27774b7073b5d520c80f1fcb8e9993fc139f21bd Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 21 Sep 2020 13:12:19 +0100 Subject: btf: Add BTF_ID_LIST_SINGLE macro Add a convenience macro that allows defining a BTF ID list with a single item. This lets us cut down on repetitive macros. Suggested-by: Andrii Nakryiko Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200921121227.255763-4-lmb@cloudflare.com --- include/linux/btf_ids.h | 8 ++++++++ tools/include/linux/btf_ids.h | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'tools') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 210b086188a3..57890b357f85 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -76,6 +76,13 @@ extern u32 name[]; #define BTF_ID_LIST_GLOBAL(name) \ __BTF_ID_LIST(name, globl) +/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with + * a single entry. + */ +#define BTF_ID_LIST_SINGLE(name, prefix, typename) \ + BTF_ID_LIST(name) \ + BTF_ID(prefix, typename) + /* * The BTF_ID_UNUSED macro defines 4 zero bytes. * It's used when we want to define 'unused' entry @@ -140,6 +147,7 @@ extern struct btf_id_set name; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; #define BTF_SET_START(name) static struct btf_id_set name = { 0 }; #define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; #define BTF_SET_END(name) diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 210b086188a3..57890b357f85 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -76,6 +76,13 @@ extern u32 name[]; #define BTF_ID_LIST_GLOBAL(name) \ __BTF_ID_LIST(name, globl) +/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with + * a single entry. + */ +#define BTF_ID_LIST_SINGLE(name, prefix, typename) \ + BTF_ID_LIST(name) \ + BTF_ID(prefix, typename) + /* * The BTF_ID_UNUSED macro defines 4 zero bytes. * It's used when we want to define 'unused' entry @@ -140,6 +147,7 @@ extern struct btf_id_set name; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; #define BTF_SET_START(name) static struct btf_id_set name = { 0 }; #define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; #define BTF_SET_END(name) -- cgit v1.3.1 From a8a717963fe5ecfd274eb93dd1285ee9428ffca7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Sep 2020 13:23:38 +0200 Subject: selftests/bpf: Fix stat probe in d_path test Some kernels builds might inline vfs_getattr call within fstat syscall code path, so fentry/vfs_getattr trampoline is not called. Add security_inode_getattr to allowlist and switch the d_path test stat trampoline to security_inode_getattr. Keeping dentry_open and filp_close, because they are in their own files, so unlikely to be inlined, but in case they are, adding security_file_open. Adding flags that indicate trampolines were called and failing the test if any of them got missed, so it's easier to identify the issue next time. Fixes: e4d1af4b16f8 ("selftests/bpf: Add test for d_path helper") Suggested-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200918112338.2618444-1-jolsa@kernel.org --- kernel/trace/bpf_trace.c | 8 ++++++++ tools/testing/selftests/bpf/prog_tests/d_path.c | 10 ++++++++++ tools/testing/selftests/bpf/progs/test_d_path.c | 9 ++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ebf9be4d0d6a..36508f46a8db 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1114,6 +1114,14 @@ BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) } BTF_SET_START(btf_allowlist_d_path) +#ifdef CONFIG_SECURITY +BTF_ID(func, security_file_permission) +BTF_ID(func, security_inode_getattr) +BTF_ID(func, security_file_open) +#endif +#ifdef CONFIG_SECURITY_PATH +BTF_ID(func, security_path_truncate) +#endif BTF_ID(func, vfs_truncate) BTF_ID(func, vfs_fallocate) BTF_ID(func, dentry_open) diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index fc12e0d445ff..0a577a248d34 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -120,6 +120,16 @@ void test_d_path(void) if (err < 0) goto cleanup; + if (CHECK(!bss->called_stat, + "stat", + "trampoline for security_inode_getattr was not called\n")) + goto cleanup; + + if (CHECK(!bss->called_close, + "close", + "trampoline for filp_close was not called\n")) + goto cleanup; + for (int i = 0; i < MAX_FILES; i++) { CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN), "check", diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c index 61f007855649..84e1f883f97b 100644 --- a/tools/testing/selftests/bpf/progs/test_d_path.c +++ b/tools/testing/selftests/bpf/progs/test_d_path.c @@ -15,7 +15,10 @@ char paths_close[MAX_FILES][MAX_PATH_LEN] = {}; int rets_stat[MAX_FILES] = {}; int rets_close[MAX_FILES] = {}; -SEC("fentry/vfs_getattr") +int called_stat = 0; +int called_close = 0; + +SEC("fentry/security_inode_getattr") int BPF_PROG(prog_stat, struct path *path, struct kstat *stat, __u32 request_mask, unsigned int query_flags) { @@ -23,6 +26,8 @@ int BPF_PROG(prog_stat, struct path *path, struct kstat *stat, __u32 cnt = cnt_stat; int ret; + called_stat = 1; + if (pid != my_pid) return 0; @@ -42,6 +47,8 @@ int BPF_PROG(prog_close, struct file *file, void *id) __u32 cnt = cnt_close; int ret; + called_close = 1; + if (pid != my_pid) return 0; -- cgit v1.3.1 From f02ced62ec35e847ebadb1c6759320345c30fcb5 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 9 Sep 2020 20:25:36 +0200 Subject: selftests: netfilter: add cpu counter check run task on first CPU with netfilter counters reset and check cpu meta after another ping Signed-off-by: Fabian Frederick Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_meta.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index d250b84dd5bc..17b2d6eaa204 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -33,6 +33,7 @@ table inet filter { counter infproto4count {} counter il4protocounter {} counter imarkcounter {} + counter icpu0counter {} counter oifcount {} counter oifnamecount {} @@ -54,6 +55,7 @@ table inet filter { meta nfproto ipv4 counter name "infproto4count" meta l4proto icmp counter name "il4protocounter" meta mark 42 counter name "imarkcounter" + meta cpu 0 counter name "icpu0counter" } chain output { @@ -119,6 +121,18 @@ check_one_counter omarkcounter "1" true if [ $ret -eq 0 ];then echo "OK: nftables meta iif/oif counters at expected values" +else + exit $ret +fi + +#First CPU execution and counter +taskset -p 01 $$ > /dev/null +ip netns exec "$ns0" nft reset counters > /dev/null +ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null +check_one_counter icpu0counter "2" true + +if [ $ret -eq 0 ];then + echo "OK: nftables meta cpu counter at expected values" fi exit $ret -- cgit v1.3.1 From 5b1a995bfa93667762badbe1d8f0f7ad680bdf29 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 9 Sep 2020 20:26:13 +0200 Subject: selftests: netfilter: fix nft_meta.sh error reporting When some test directly done with check_one_counter() fails, counter variable is undefined. This patch calls ip with cname which avoids errors like: FAIL: oskuidcounter, want "packets 2", got Error: syntax error, unexpected newline, expecting string list counter inet filter ^ Error is now correctly rendered: FAIL: oskuidcounter, want "packets 2", got table inet filter { counter oskuidcounter { packets 1 bytes 84 } } Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_meta.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index 17b2d6eaa204..1f5b46542c14 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -90,7 +90,7 @@ check_one_counter() if [ $? -ne 0 ];then echo "FAIL: $cname, want \"$want\", got" ret=1 - ip netns exec "$ns0" nft list counter inet filter $counter + ip netns exec "$ns0" nft list counter inet filter $cname fi } -- cgit v1.3.1 From d30a7d54e8481acbb5cc42016c817e598c3d697b Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 9 Sep 2020 20:26:24 +0200 Subject: selftests: netfilter: remove unused cnt and simplify command testing cnt was not used in nft_meta.sh This patch also fixes 2 shellcheck SC2181 warnings: "check exit code directly with e.g. 'if mycmd;', not indirectly with $?." Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_meta.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index 1f5b46542c14..18a1abca3262 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -7,8 +7,7 @@ ksft_skip=4 sfx=$(mktemp -u "XXXXXXXX") ns0="ns0-$sfx" -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then +if ! nft --version > /dev/null 2>&1; then echo "SKIP: Could not run test without nft tool" exit $ksft_skip fi @@ -86,8 +85,7 @@ check_one_counter() local want="packets $2" local verbose="$3" - cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want") - if [ $? -ne 0 ];then + if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then echo "FAIL: $cname, want \"$want\", got" ret=1 ip netns exec "$ns0" nft list counter inet filter $cname -- cgit v1.3.1 From 8f2a59968f961570fe7b9d99e3a615dd21477638 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 17:56:16 +0900 Subject: selftests/ftrace: Add %return suffix tests Add kprobe %return suffix testcase and syntax error tests for %return suffix. Link: https://lkml.kernel.org/r/159972817653.428528.9180599115849301184.stgit@devnote2 Acked-by: Shuah Khan Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 6 ++++++ .../ftrace/test.d/kprobe/kretprobe_return_suffix.tc | 21 +++++++++++++++++++++ .../ftrace/test.d/kprobe/uprobe_syntax_errors.tc | 6 ++++++ 3 files changed, 33 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index b4d834675e59..56b3f36c722b 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -97,4 +97,10 @@ check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE fi +# %return suffix errors +if grep -q "place (kretprobe): .*%return.*" README; then +check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX +check_error 'p ^vfs_read+10%return' # BAD_RETPROBE +fi + exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc new file mode 100644 index 000000000000..f07bd15cc033 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc @@ -0,0 +1,21 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kretprobe %%return suffix test +# requires: kprobe_events '[+]%return':README + +# Test for kretprobe by "r" +echo 'r:myprobeaccept vfs_read' > kprobe_events +RESULT1=`cat kprobe_events` + +# Test for kretprobe by "%return" +echo 'p:myprobeaccept vfs_read%return' > kprobe_events +RESULT2=`cat kprobe_events` + +if [ "$RESULT1" != "$RESULT2" ]; then + echo "Error: %return suffix didn't make a return probe." + echo "r-command: $RESULT1" + echo "%return: $RESULT2" + exit_fail +fi + +echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc index 7b5b60c3c5a2..f5e3f9e4a01f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc @@ -17,4 +17,10 @@ check_error 'p /bin/sh:10(10)^a' # BAD_REFCNT_SUFFIX check_error 'p /bin/sh:10 ^@+ab' # BAD_FILE_OFFS check_error 'p /bin/sh:10 ^@symbol' # SYM_ON_UPROBE +# %return suffix error +if grep -q "place (uprobe): .*%return.*" README; then +check_error 'p /bin/sh:10^%hoge' # BAD_ADDR_SUFFIX +check_error 'p /bin/sh:10(10)^%return' # BAD_REFCNT_SUFFIX +fi + exit 0 -- cgit v1.3.1 From 03fca3af51703bd77ed14c88c3d5733840a69f3f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 14 Sep 2020 19:53:06 +0800 Subject: perf tsc: Move out common functions from x86 Functions perf_read_tsc_conversion() and perf_event__synth_time_conv() should work as common functions rather than x86 specific, so move these two functions out from arch/x86 folder and place them into util/tsc.c. Since the function perf_event__synth_time_conv() will be linked in util/tsc.c, remove its weak version. Committer testing: Before/after: # perf test tsc 70: Convert perf time to TSC : Ok # # perf test -v tsc 70: Convert perf time to TSC : --- start --- test child forked, pid 8520 mmap size 528384B 1st event perf time 592110439891 tsc 2317172044331 rdtsc time 592110441915 tsc 2317172052010 2nd event perf time 592110442336 tsc 2317172053605 test child finished with 0 ---- end ---- Convert perf time to TSC: Ok # Signed-off-by: Leo Yan Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kemeng Shi Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Gasson Cc: Peter Zijlstra Cc: Remi Bernon Cc: Stephane Eranian Cc: Steve Maclean Cc: Will Deacon Cc: Zou Wei Link: http://lore.kernel.org/lkml/20200914115311.2201-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/tsc.c | 73 +------------------------------------- tools/perf/util/synthetic-events.c | 8 ----- tools/perf/util/tsc.c | 71 ++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 80 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 2f55afb14e1f..559365f8fe52 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -1,45 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include - -#include -#include - #include -#include -#include "../../../util/debug.h" -#include "../../../util/event.h" -#include "../../../util/synthetic-events.h" -#include "../../../util/tsc.h" - -int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, - struct perf_tsc_conversion *tc) -{ - bool cap_user_time_zero; - u32 seq; - int i = 0; - - while (1) { - seq = pc->lock; - rmb(); - tc->time_mult = pc->time_mult; - tc->time_shift = pc->time_shift; - tc->time_zero = pc->time_zero; - cap_user_time_zero = pc->cap_user_time_zero; - rmb(); - if (pc->lock == seq && !(seq & 1)) - break; - if (++i > 10000) { - pr_debug("failed to get perf_event_mmap_page lock\n"); - return -EINVAL; - } - } - if (!cap_user_time_zero) - return -EOPNOTSUPP; - - return 0; -} +#include "../../../util/tsc.h" u64 rdtsc(void) { @@ -49,36 +11,3 @@ u64 rdtsc(void) return low | ((u64)high) << 32; } - -int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, - struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - union perf_event event = { - .time_conv = { - .header = { - .type = PERF_RECORD_TIME_CONV, - .size = sizeof(struct perf_record_time_conv), - }, - }, - }; - struct perf_tsc_conversion tc; - int err; - - if (!pc) - return 0; - err = perf_read_tsc_conversion(pc, &tc); - if (err == -EOPNOTSUPP) - return 0; - if (err) - return err; - - pr_debug2("Synthesizing TSC conversion information\n"); - - event.time_conv.time_mult = tc.time_mult; - event.time_conv.time_shift = tc.time_shift; - event.time_conv.time_zero = tc.time_zero; - - return process(tool, &event, NULL, machine); -} diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 89b390623b63..3ca5d9399680 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2006,14 +2006,6 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p return 0; } -int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, - struct perf_tool *tool __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused) -{ - return 0; -} - extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index bfa782421cbd..9e3f04ddddf8 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -1,7 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 +#include + #include +#include +#include #include +#include + +#include "event.h" +#include "synthetic-events.h" +#include "debug.h" #include "tsc.h" u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc) @@ -25,6 +34,68 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) ((rem * tc->time_mult) >> tc->time_shift); } +int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, + struct perf_tsc_conversion *tc) +{ + bool cap_user_time_zero; + u32 seq; + int i = 0; + + while (1) { + seq = pc->lock; + rmb(); + tc->time_mult = pc->time_mult; + tc->time_shift = pc->time_shift; + tc->time_zero = pc->time_zero; + cap_user_time_zero = pc->cap_user_time_zero; + rmb(); + if (pc->lock == seq && !(seq & 1)) + break; + if (++i > 10000) { + pr_debug("failed to get perf_event_mmap_page lock\n"); + return -EINVAL; + } + } + + if (!cap_user_time_zero) + return -EOPNOTSUPP; + + return 0; +} + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event = { + .time_conv = { + .header = { + .type = PERF_RECORD_TIME_CONV, + .size = sizeof(struct perf_record_time_conv), + }, + }, + }; + struct perf_tsc_conversion tc; + int err; + + if (!pc) + return 0; + err = perf_read_tsc_conversion(pc, &tc); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + pr_debug2("Synthesizing TSC conversion information\n"); + + event.time_conv.time_mult = tc.time_mult; + event.time_conv.time_shift = tc.time_shift; + event.time_conv.time_zero = tc.time_zero; + + return process(tool, &event, NULL, machine); +} + u64 __weak rdtsc(void) { return 0; -- cgit v1.3.1 From 4979e861415d59a479c7376e16d90df83da94bb1 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 14 Sep 2020 19:53:07 +0800 Subject: perf tsc: Add rdtsc() for Arm64 The system register CNTVCT_EL0 can be used to retrieve the counter from user space. Add rdtsc() for Arm64. Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kemeng Shi Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Gasson Cc: Peter Zijlstra Cc: Remi Bernon Cc: Stephane Eranian Cc: Steve Maclean Cc: Will Deacon Cc: Zou Wei Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200914115311.2201-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/tsc.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tools/perf/arch/arm64/util/tsc.c (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 5c13438c7bd4..b53294d74b01 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,6 +1,7 @@ perf-y += header.o perf-y += machine.o perf-y += perf_regs.o +perf-y += tsc.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c new file mode 100644 index 000000000000..cc85bd9e73f1 --- /dev/null +++ b/tools/perf/arch/arm64/util/tsc.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include "../../../util/tsc.h" + +u64 rdtsc(void) +{ + u64 val; + + /* + * According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the + * system counter is at least 56 bits wide; from Armv8.6, the counter + * must be 64 bits wide. So the system counter could be less than 64 + * bits wide and it is attributed with the flag 'cap_user_time_short' + * is true. + */ + asm volatile("mrs %0, cntvct_el0" : "=r" (val)); + + return val; +} -- cgit v1.3.1 From 78a93d4cec6bc7c6011319d2b8f6509d96de186f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 14 Sep 2020 19:53:08 +0800 Subject: perf tsc: Calculate timestamp with cap_user_time_short The perf mmap'ed buffer contains the flag 'cap_user_time_short' and two extra fields 'time_cycles' and 'time_mask', perf tool needs to know them for handling the counter wrapping case. This patch is to reads out the relevant parameters from the head of the first mmap'ed page and stores into the structure 'perf_tsc_conversion', if the flag 'cap_user_time_short' has been set, it will firstly calibrate cycle value for timestamp calculation. Committer testing: Before/after: # perf test tsc 70: Convert perf time to TSC : Ok # # perf test -v tsc 70: Convert perf time to TSC : --- start --- test child forked, pid 11059 mmap size 528384B 1st event perf time 996384576521 tsc 3850532906613 rdtsc time 996384578455 tsc 3850532913950 2nd event perf time 996384578845 tsc 3850532915428 test child finished with 0 ---- end ---- Convert perf time to TSC: Ok # Signed-off-by: Leo Yan Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kemeng Shi Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Gasson Cc: Peter Zijlstra Cc: Remi Bernon Cc: Stephane Eranian Cc: Steve Maclean Cc: Will Deacon Cc: Zou Wei Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200914115311.2201-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/tsc.c | 12 +++++++++--- tools/perf/util/tsc.h | 5 +++++ 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 9e3f04ddddf8..c0ca40204649 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -28,6 +28,10 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) { u64 quot, rem; + if (tc->cap_user_time_short) + cyc = tc->time_cycles + + ((cyc - tc->time_cycles) & tc->time_mask); + quot = cyc >> tc->time_shift; rem = cyc & (((u64)1 << tc->time_shift) - 1); return tc->time_zero + quot * tc->time_mult + @@ -37,7 +41,6 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion *tc) { - bool cap_user_time_zero; u32 seq; int i = 0; @@ -47,7 +50,10 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, tc->time_mult = pc->time_mult; tc->time_shift = pc->time_shift; tc->time_zero = pc->time_zero; - cap_user_time_zero = pc->cap_user_time_zero; + tc->time_cycles = pc->time_cycles; + tc->time_mask = pc->time_mask; + tc->cap_user_time_zero = pc->cap_user_time_zero; + tc->cap_user_time_short = pc->cap_user_time_short; rmb(); if (pc->lock == seq && !(seq & 1)) break; @@ -57,7 +63,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, } } - if (!cap_user_time_zero) + if (!tc->cap_user_time_zero) return -EOPNOTSUPP; return 0; diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 3c5a632ee57c..72a15419f3b3 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -8,6 +8,11 @@ struct perf_tsc_conversion { u16 time_shift; u32 time_mult; u64 time_zero; + u64 time_cycles; + u64 time_mask; + + bool cap_user_time_zero; + bool cap_user_time_short; }; struct perf_event_mmap_page; -- cgit v1.3.1 From d110162cafc80dad0622cfd40f3113aebb77e1bb Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 14 Sep 2020 19:53:09 +0800 Subject: perf tsc: Support cap_user_time_short for event TIME_CONV The synthesized event TIME_CONV doesn't contain the complete parameters for counters, this will lead to wrong conversion between counter cycles and timestamp. This patch extends event TIME_CONV to record flags 'cap_user_time_zero' which is used to indicate the counter parameters are valid or not, if not will directly return 0 for timestamp calculation. And record the flag 'cap_user_time_short' and its relevant fields 'time_cycles' and 'time_mask' for cycle calibration. Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kemeng Shi Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Gasson Cc: Peter Zijlstra Cc: Remi Bernon Cc: Stephane Eranian Cc: Steve Maclean Cc: Will Deacon Cc: Zou Wei Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200914115311.2201-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/event.h | 4 ++++ tools/perf/util/jitdump.c | 14 +++++++++----- tools/perf/util/tsc.c | 4 ++++ 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 842028858d66..a6dbba6b9073 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -324,6 +324,10 @@ struct perf_record_time_conv { __u64 time_shift; __u64 time_mult; __u64 time_zero; + __u64 time_cycles; + __u64 time_mask; + bool cap_user_time_zero; + bool cap_user_time_short; }; struct perf_record_header_feature { diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 0804308ef285..055bab7a92b3 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -374,11 +374,15 @@ static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) if (!jd->use_arch_timestamp) return timestamp; - tc.time_shift = jd->session->time_conv.time_shift; - tc.time_mult = jd->session->time_conv.time_mult; - tc.time_zero = jd->session->time_conv.time_zero; - - if (!tc.time_mult) + tc.time_shift = jd->session->time_conv.time_shift; + tc.time_mult = jd->session->time_conv.time_mult; + tc.time_zero = jd->session->time_conv.time_zero; + tc.time_cycles = jd->session->time_conv.time_cycles; + tc.time_mask = jd->session->time_conv.time_mask; + tc.cap_user_time_zero = jd->session->time_conv.cap_user_time_zero; + tc.cap_user_time_short = jd->session->time_conv.cap_user_time_short; + + if (!tc.cap_user_time_zero) return 0; return tsc_to_perf_time(timestamp, &tc); diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index c0ca40204649..62b4c75c966c 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -98,6 +98,10 @@ int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, event.time_conv.time_mult = tc.time_mult; event.time_conv.time_shift = tc.time_shift; event.time_conv.time_zero = tc.time_zero; + event.time_conv.time_cycles = tc.time_cycles; + event.time_conv.time_mask = tc.time_mask; + event.time_conv.cap_user_time_zero = tc.cap_user_time_zero; + event.time_conv.cap_user_time_short = tc.cap_user_time_short; return process(tool, &event, NULL, machine); } -- cgit v1.3.1 From 028abd9222df0cf5855dab5014a5ebaf06f90565 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2020 10:22:34 +0200 Subject: fs: remove compat_sys_mount compat_sys_mount is identical to the regular sys_mount now, so remove it and use the native version everywhere. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- arch/arm64/include/asm/unistd32.h | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- fs/Makefile | 1 - fs/compat.c | 57 ---------------------- fs/internal.h | 3 -- fs/namespace.c | 4 +- include/linux/compat.h | 6 --- include/uapi/asm-generic/unistd.h | 2 +- tools/include/uapi/asm-generic/unistd.h | 2 +- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- 17 files changed, 14 insertions(+), 81 deletions(-) delete mode 100644 fs/compat.c (limited to 'tools') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 734860ac7cf9..5fd095d65450 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -53,7 +53,7 @@ __SYSCALL(__NR_lseek, compat_sys_lseek) #define __NR_getpid 20 __SYSCALL(__NR_getpid, sys_getpid) #define __NR_mount 21 -__SYSCALL(__NR_mount, compat_sys_mount) +__SYSCALL(__NR_mount, sys_mount) /* 22 was sys_umount */ __SYSCALL(22, sys_ni_syscall) #define __NR_setuid 23 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index f9df9edb67a4..61fa9e7013cb 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -167,7 +167,7 @@ 157 n32 sync sys_sync 158 n32 acct sys_acct 159 n32 settimeofday compat_sys_settimeofday -160 n32 mount compat_sys_mount +160 n32 mount sys_mount 161 n32 umount2 sys_umount 162 n32 swapon sys_swapon 163 n32 swapoff sys_swapoff diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 195b43cf27c8..b992e89be7ff 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -29,7 +29,7 @@ 18 o32 unused18 sys_ni_syscall 19 o32 lseek sys_lseek 20 o32 getpid sys_getpid -21 o32 mount sys_mount compat_sys_mount +21 o32 mount sys_mount 22 o32 umount sys_oldumount 23 o32 setuid sys_setuid 24 o32 getuid sys_getuid diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index def64d221cd4..07efd978182f 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -29,7 +29,7 @@ 18 common stat sys_newstat compat_sys_newstat 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid -21 common mount sys_mount compat_sys_mount +21 common mount sys_mount 22 common bind sys_bind 23 common setuid sys_setuid 24 common getuid sys_getuid diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index c2d737ff2e7b..a36ad4fec73c 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -34,7 +34,7 @@ 18 spu oldstat sys_ni_syscall 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid -21 nospu mount sys_mount compat_sys_mount +21 nospu mount sys_mount 22 32 umount sys_oldumount 22 64 umount sys_ni_syscall 22 spu umount sys_ni_syscall diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 10456bc936fb..4b803dfbee2b 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -26,7 +26,7 @@ 16 32 lchown - sys_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid -21 common mount sys_mount compat_sys_mount +21 common mount sys_mount sys_mount 22 common umount sys_oldumount sys_oldumount 23 32 setuid - sys_setuid16 24 32 getuid - sys_getuid16 diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4af114e84f20..d5ff798fa08f 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -201,7 +201,7 @@ 164 64 utrap_install sys_utrap_install 165 common quotactl sys_quotactl 166 common set_tid_address sys_set_tid_address -167 common mount sys_mount compat_sys_mount +167 common mount sys_mount 168 common ustat sys_ustat compat_sys_ustat 169 common setxattr sys_setxattr 170 common lsetxattr sys_lsetxattr diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 9d1102873666..5a40b226fb7b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -32,7 +32,7 @@ 18 i386 oldstat sys_stat 19 i386 lseek sys_lseek compat_sys_lseek 20 i386 getpid sys_getpid -21 i386 mount sys_mount compat_sys_mount +21 i386 mount sys_mount 22 i386 umount sys_oldumount 23 i386 setuid sys_setuid16 24 i386 getuid sys_getuid16 diff --git a/fs/Makefile b/fs/Makefile index 1c7b0e3f6daa..d72ee2ce7af0 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -37,7 +37,6 @@ obj-$(CONFIG_FS_DAX) += dax.o obj-$(CONFIG_FS_ENCRYPTION) += crypto/ obj-$(CONFIG_FS_VERITY) += verity/ obj-$(CONFIG_FILE_LOCKING) += locks.o -obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o diff --git a/fs/compat.c b/fs/compat.c deleted file mode 100644 index 9b00523d7fa5..000000000000 --- a/fs/compat.c +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/fs/compat.c - * - * Kernel compatibililty routines for e.g. 32 bit syscall support - * on 64 bit kernels. - * - * Copyright (C) 2002 Stephen Rothwell, IBM Corporation - * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) - * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) - * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs - * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) - */ - -#include -#include -#include -#include -#include -#include "internal.h" - -COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, - const char __user *, dir_name, - const char __user *, type, compat_ulong_t, flags, - const void __user *, data) -{ - char *kernel_type; - void *options; - char *kernel_dev; - int retval; - - kernel_type = copy_mount_string(type); - retval = PTR_ERR(kernel_type); - if (IS_ERR(kernel_type)) - goto out; - - kernel_dev = copy_mount_string(dev_name); - retval = PTR_ERR(kernel_dev); - if (IS_ERR(kernel_dev)) - goto out1; - - options = copy_mount_options(data); - retval = PTR_ERR(options); - if (IS_ERR(options)) - goto out2; - - retval = do_mount(kernel_dev, dir_name, kernel_type, flags, options); - - out3: - kfree(options); - out2: - kfree(kernel_dev); - out1: - kfree(kernel_type); - out: - return retval; -} diff --git a/fs/internal.h b/fs/internal.h index 10517ece4516..a7cd0f64faa4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -82,9 +82,6 @@ int may_linkat(struct path *link); /* * namespace.c */ -extern void *copy_mount_options(const void __user *); -extern char *copy_mount_string(const void __user *); - extern struct vfsmount *lookup_mnt(const struct path *); extern int finish_automount(struct vfsmount *, struct path *); diff --git a/fs/namespace.c b/fs/namespace.c index bae0e95b3713..12b431b61462 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3072,7 +3072,7 @@ static void shrink_submounts(struct mount *mnt) } } -void *copy_mount_options(const void __user * data) +static void *copy_mount_options(const void __user * data) { char *copy; unsigned size; @@ -3097,7 +3097,7 @@ void *copy_mount_options(const void __user * data) return copy; } -char *copy_mount_string(const void __user *data) +static char *copy_mount_string(const void __user *data) { return data ? strndup_user(data, PATH_MAX) : NULL; } diff --git a/include/linux/compat.h b/include/linux/compat.h index d38c4d7e83bd..100632280ccc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -522,12 +522,6 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, compat_ulong_t arg); -/* fs/namespace.c */ -asmlinkage long compat_sys_mount(const char __user *dev_name, - const char __user *dir_name, - const char __user *type, compat_ulong_t flags, - const void __user *data); - /* fs/open.c */ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 995b36c2ea7d..fc98c9437609 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat) #define __NR_umount2 39 __SYSCALL(__NR_umount2, sys_umount) #define __NR_mount 40 -__SC_COMP(__NR_mount, sys_mount, compat_sys_mount) +__SYSCALL(__NR_mount, sys_mount) #define __NR_pivot_root 41 __SYSCALL(__NR_pivot_root, sys_pivot_root) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 995b36c2ea7d..fc98c9437609 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat) #define __NR_umount2 39 __SYSCALL(__NR_umount2, sys_umount) #define __NR_mount 40 -__SC_COMP(__NR_mount, sys_mount, compat_sys_mount) +__SYSCALL(__NR_mount, sys_mount) #define __NR_pivot_root 41 __SYSCALL(__NR_pivot_root, sys_pivot_root) diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3ca6fe057a0b..c2866c659650 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -32,7 +32,7 @@ 18 spu oldstat sys_ni_syscall 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid -21 nospu mount sys_mount compat_sys_mount +21 nospu mount sys_mount 22 32 umount sys_oldumount 22 64 umount sys_ni_syscall 22 spu umount sys_ni_syscall diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 6a0bbea225db..8e0806f6c38e 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -26,7 +26,7 @@ 16 32 lchown - compat_sys_s390_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid -21 common mount sys_mount compat_sys_mount +21 common mount sys_mount 22 common umount sys_oldumount compat_sys_oldumount 23 32 setuid - compat_sys_s390_setuid16 24 32 getuid - compat_sys_s390_getuid16 -- cgit v1.3.1 From 002a3d690f95804bdef6b70b26154103518e13d9 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 22 Sep 2020 09:50:04 +0800 Subject: perf stat: Skip duration_time in setup_system_wide Some metrics (such as DRAM_BW_Use) consists of uncore events and duration_time. For uncore events, counter->core.system_wide is true. But for duration_time, counter->core.system_wide is false so target.system_wide is set to false. Then 'enable_on_exec' is set in perf_event_attr of uncore event. Kernel will return error when trying to open the uncore event. This patch skips the duration_time in setup_system_wide then target.system_wide will be set to true for the evlist of uncore events + duration_time. Before (tested on skylake desktop): # perf stat -M DRAM_BW_Use -- sleep 1 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (arb/event=0x84,umask=0x1/). /bin/dmesg | grep -i perf may provide additional information. After: # perf stat -M DRAM_BW_Use -- sleep 1 Performance counter stats for 'system wide': 169 arb/event=0x84,umask=0x1/ # 0.00 DRAM_BW_Use 40,427 arb/event=0x81,umask=0x1/ 1,000,902,197 ns duration_time 1.000902197 seconds time elapsed Fixes: e3ba76deef23064f ("perf tools: Force uncore events to system wide monitoring") Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200922015004.30114-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 17fbbd03e154..cbb2d977eec7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2024,8 +2024,10 @@ static void setup_system_wide(int forks) struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { - if (!counter->core.system_wide) + if (!counter->core.system_wide && + strcmp(counter->name, "duration_time")) { return; + } } if (evsel_list->core.nr_entries) -- cgit v1.3.1 From 2a684fcb605ae1febef381b85bc2b5f3edf47cc5 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Mon, 21 Sep 2020 22:19:27 +0200 Subject: perf script: Autopep8 futex-contention 10 years leaves its mark! Python has evolved and so has its style guide. Even with vim it is getting hard to follow the no longer valid guidelines (spaces vs. tabs). Autopep8 this code to modernize it! Signed-off-by: Hagen Paul Pfeifer Link: http://lore.kernel.org/lkml/20200921201928.799498-1-hagen@jauu.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/futex-contention.py | 51 +++++++++++++++------------ 1 file changed, 28 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py index 0c4841acf75d..c440f02627dd 100644 --- a/tools/perf/scripts/python/futex-contention.py +++ b/tools/perf/scripts/python/futex-contention.py @@ -12,41 +12,46 @@ from __future__ import print_function -import os, sys -sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') +import os +import sys +sys.path.append(os.environ['PERF_EXEC_PATH'] + + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from Util import * process_names = {} thread_thislock = {} thread_blocktime = {} -lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time -process_names = {} # long-lived pid-to-execname mapping +lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time +process_names = {} # long-lived pid-to-execname mapping + def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain, - nr, uaddr, op, val, utime, uaddr2, val3): - cmd = op & FUTEX_CMD_MASK - if cmd != FUTEX_WAIT: - return # we don't care about originators of WAKE events + nr, uaddr, op, val, utime, uaddr2, val3): + cmd = op & FUTEX_CMD_MASK + if cmd != FUTEX_WAIT: + return # we don't care about originators of WAKE events + + process_names[tid] = comm + thread_thislock[tid] = uaddr + thread_blocktime[tid] = nsecs(s, ns) - process_names[tid] = comm - thread_thislock[tid] = uaddr - thread_blocktime[tid] = nsecs(s, ns) def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain, - nr, ret): - if tid in thread_blocktime: - elapsed = nsecs(s, ns) - thread_blocktime[tid] - add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed) - del thread_blocktime[tid] - del thread_thislock[tid] + nr, ret): + if tid in thread_blocktime: + elapsed = nsecs(s, ns) - thread_blocktime[tid] + add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed) + del thread_blocktime[tid] + del thread_thislock[tid] + def trace_begin(): - print("Press control+C to stop and show the summary") + print("Press control+C to stop and show the summary") -def trace_end(): - for (tid, lock) in lock_waits: - min, max, avg, count = lock_waits[tid, lock] - print("%s[%d] lock %x contended %d times, %d avg ns" % - (process_names[tid], tid, lock, count, avg)) +def trace_end(): + for (tid, lock) in lock_waits: + min, max, avg, count = lock_waits[tid, lock] + print("%s[%d] lock %x contended %d times, %d avg ns" % + (process_names[tid], tid, lock, count, avg)) -- cgit v1.3.1 From 69f48c7040d3a02a7a77ac6ac77e174958786c7c Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Tue, 22 Sep 2020 22:09:22 +0200 Subject: perf script: Add min, max to futex-contention output, in addition to avg Average is quite informative, but the outliners - especially max - are also of interest. Before: mutex-locker[793299] lock 5637ec61e080 contended 3400 times, 446 avg ns mutex-locker[793301] lock 5637ec61e080 contended 3563 times, 385 avg ns mutex-locker[793300] lock 5637ec61e080 contended 3110 times, 1855 avg ns After: mutex-locker[795251] lock 55b14e6dd080 contended 3853 times, 1279 avg ns [max: 12270 ns, min 340 ns] mutex-locker[795253] lock 55b14e6dd080 contended 2911 times, 518 avg ns [max: 51660261 ns, min 347 ns] mutex-locker[795252] lock 55b14e6dd080 contended 3843 times, 385 avg ns [max: 24323998 ns, min 338 ns] Committer testing: [root@five ~]# perf script record futex-contention -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.877 MB perf.data (923 samples) ] [root@five ~]# perf evlist syscalls:sys_enter_futex syscalls:sys_exit_futex dummy:HG # Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events # Before: [root@five ~]# perf script report futex-contention JS Helper[2457] lock 55fe0cf82610 contended 4 times, 6657 avg ns ibus-daemon[2975] lock 56227f6d0210 contended 4 times, 1020 avg ns chromium-browse[1905801] lock 7ffe573f5088 contended 8 times, 108463 avg ns gnome-shell[2240] lock 55fe0cf82678 contended 1 times, 8616 avg ns gnome-shel:cs0[2292] lock 55fe0d0ab768 contended 3 times, 606016034 avg ns JS Helper[2458] lock 55fe0cf82690 contended 1 times, 1167840 avg ns chromium-browse[1905470] lock 7ffe573f5358 contended 1 times, 551504 avg ns chromium-browse[1905948] lock 7ffe573f5358 contended 1 times, 577422 avg ns gnome-shell[2240] lock 55fe0cf82660 contended 6 times, 202696 avg ns pool[2602] lock 7fd600008ef0 contended 1 times, 500046007 avg ns chromium-browse[1905801] lock 7ffe573f5128 contended 4 times, 285083 avg ns JS Helper[2460] lock 55fe0cf82690 contended 1 times, 680877 avg ns JS Helper[2459] lock 55fe0cf82610 contended 7 times, 4224 avg ns chromium-browse[1905434] lock 7ffe573f5358 contended 1 times, 697038 avg ns chromium-browse[212592] lock 7ffe573f53c8 contended 4 times, 460601 avg ns gnome-shel:cs0[2292] lock 55fe0d0ab76c contended 2 times, 601237648 avg ns JS Helper[2460] lock 55fe0cf82610 contended 4 times, 3340 avg ns JS Helper[2462] lock 55fe0cf82694 contended 1 times, 237275 avg ns chromium-browse[1905605] lock 7ffe573f5358 contended 2 times, 634555 avg ns chromium-browse[1905992] lock 7ffe573f5358 contended 1 times, 583965 avg ns chromium-browse[1905647] lock 7ffe573f5368 contended 8 times, 549800 avg ns JS Helper[2462] lock 55fe0cf82610 contended 2 times, 4694 avg ns JS Helper[2461] lock 55fe0cf82694 contended 1 times, 257793 avg ns JS Helper[2456] lock 55fe0cf82690 contended 1 times, 677771 avg ns JS Helper[2463] lock 55fe0cf82610 contended 3 times, 5139 avg ns gdbus[2980] lock 56227f6d0210 contended 2 times, 2465 avg ns gnome-shell[2240] lock 55fe0cf82664 contended 5 times, 8036 avg ns chromium-browse[1906308] lock 7ffe573f5358 contended 1 times, 210735 avg ns JS Helper[2463] lock 55fe0cf82694 contended 1 times, 251531 avg ns chromium-browse[1905801] lock 7ffe573f4f58 contended 4 times, 399927 avg ns [root@five ~]# After: [root@five ~]# perf script report futex-contention JS Helper[2457] lock 55fe0cf82610 contended 4 times, 6657 avg ns [max: 11502 ns, min 792 ns] ibus-daemon[2975] lock 56227f6d0210 contended 4 times, 1020 avg ns [max: 1813 ns, min 581 ns] chromium-browse[1905801] lock 7ffe573f5088 contended 8 times, 108463 avg ns [max: 380103 ns, min 57989 ns] gnome-shell[2240] lock 55fe0cf82678 contended 1 times, 8616 avg ns [max: 8616 ns, min 8616 ns] gnome-shel:cs0[2292] lock 55fe0d0ab768 contended 3 times, 606016034 avg ns [max: 611295960 ns, min 600191357 ns] JS Helper[2458] lock 55fe0cf82690 contended 1 times, 1167840 avg ns [max: 1167840 ns, min 1167840 ns] chromium-browse[1905470] lock 7ffe573f5358 contended 1 times, 551504 avg ns [max: 551504 ns, min 551504 ns] chromium-browse[1905948] lock 7ffe573f5358 contended 1 times, 577422 avg ns [max: 577422 ns, min 577422 ns] gnome-shell[2240] lock 55fe0cf82660 contended 6 times, 202696 avg ns [max: 398998 ns, min 5050 ns] pool[2602] lock 7fd600008ef0 contended 1 times, 500046007 avg ns [max: 500046007 ns, min 500046007 ns] chromium-browse[1905801] lock 7ffe573f5128 contended 4 times, 285083 avg ns [max: 389531 ns, min 76183 ns] JS Helper[2460] lock 55fe0cf82690 contended 1 times, 680877 avg ns [max: 680877 ns, min 680877 ns] JS Helper[2459] lock 55fe0cf82610 contended 7 times, 4224 avg ns [max: 12724 ns, min 1012 ns] chromium-browse[1905434] lock 7ffe573f5358 contended 1 times, 697038 avg ns [max: 697038 ns, min 697038 ns] chromium-browse[212592] lock 7ffe573f53c8 contended 4 times, 460601 avg ns [max: 594956 ns, min 232996 ns] gnome-shel:cs0[2292] lock 55fe0d0ab76c contended 2 times, 601237648 avg ns [max: 601255863 ns, min 601219434 ns] JS Helper[2460] lock 55fe0cf82610 contended 4 times, 3340 avg ns [max: 9168 ns, min 962 ns] JS Helper[2462] lock 55fe0cf82694 contended 1 times, 237275 avg ns [max: 237275 ns, min 237275 ns] chromium-browse[1905605] lock 7ffe573f5358 contended 2 times, 634555 avg ns [max: 1024060 ns, min 245050 ns] chromium-browse[1905992] lock 7ffe573f5358 contended 1 times, 583965 avg ns [max: 583965 ns, min 583965 ns] chromium-browse[1905647] lock 7ffe573f5368 contended 8 times, 549800 avg ns [max: 775293 ns, min 258375 ns] JS Helper[2462] lock 55fe0cf82610 contended 2 times, 4694 avg ns [max: 8556 ns, min 832 ns] JS Helper[2461] lock 55fe0cf82694 contended 1 times, 257793 avg ns [max: 257793 ns, min 257793 ns] JS Helper[2456] lock 55fe0cf82690 contended 1 times, 677771 avg ns [max: 677771 ns, min 677771 ns] JS Helper[2463] lock 55fe0cf82610 contended 3 times, 5139 avg ns [max: 6873 ns, min 931 ns] gdbus[2980] lock 56227f6d0210 contended 2 times, 2465 avg ns [max: 4188 ns, min 742 ns] gnome-shell[2240] lock 55fe0cf82664 contended 5 times, 8036 avg ns [max: 13105 ns, min 401 ns] chromium-browse[1906308] lock 7ffe573f5358 contended 1 times, 210735 avg ns [max: 210735 ns, min 210735 ns] JS Helper[2463] lock 55fe0cf82694 contended 1 times, 251531 avg ns [max: 251531 ns, min 251531 ns] chromium-browse[1905801] lock 7ffe573f4f58 contended 4 times, 399927 avg ns [max: 476904 ns, min 178495 ns] [root@five ~]# Signed-off-by: Hagen Paul Pfeifer Tested-by: Arnaldo Carvalho de Melo Link: http://lore.kernel.org/lkml/20200922200922.1306034-1-hagen@jauu.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/futex-contention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py index c440f02627dd..7e884d46f920 100644 --- a/tools/perf/scripts/python/futex-contention.py +++ b/tools/perf/scripts/python/futex-contention.py @@ -53,5 +53,5 @@ def trace_begin(): def trace_end(): for (tid, lock) in lock_waits: min, max, avg, count = lock_waits[tid, lock] - print("%s[%d] lock %x contended %d times, %d avg ns" % - (process_names[tid], tid, lock, count, avg)) + print("%s[%d] lock %x contended %d times, %d avg ns [max: %d ns, min %d ns]" % + (process_names[tid], tid, lock, count, avg, max, min)) -- cgit v1.3.1 From 997a91fd4448e2600e460af31e5fb934328374a9 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Sat, 5 Sep 2020 01:31:39 -0400 Subject: selftests: Add missing gitignore entries Prevent them from polluting git status after building selftests. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Shuah Khan --- tools/testing/selftests/firmware/.gitignore | 2 ++ tools/testing/selftests/netfilter/.gitignore | 2 ++ tools/testing/selftests/ptrace/.gitignore | 1 + 3 files changed, 5 insertions(+) create mode 100644 tools/testing/selftests/firmware/.gitignore create mode 100644 tools/testing/selftests/netfilter/.gitignore (limited to 'tools') diff --git a/tools/testing/selftests/firmware/.gitignore b/tools/testing/selftests/firmware/.gitignore new file mode 100644 index 000000000000..62abc92a94c4 --- /dev/null +++ b/tools/testing/selftests/firmware/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +fw_namespace diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore new file mode 100644 index 000000000000..8448f74adfec --- /dev/null +++ b/tools/testing/selftests/netfilter/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +nf-queue diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index 7bebf9534a86..792318aaa30c 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only get_syscall_info peeksiginfo +vmaccess -- cgit v1.3.1 From dc3652d3f0d5479768ec8eb7f7aabbba6ed75d95 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Sep 2020 20:57:35 +0200 Subject: tools resolve_btfids: Always force HOSTARCH Seth reported problem with cross builds, that fail on resolve_btfids build, because we are trying to build it on cross build arch. Fixing this by always forcing the host arch. Reported-by: Seth Forshee Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200923185735.3048198-2-jolsa@kernel.org --- tools/bpf/resolve_btfids/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index a88cd4426398..d3c818b8d8d3 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only include ../../scripts/Makefile.include +include ../../scripts/Makefile.arch ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) @@ -29,6 +30,7 @@ endif AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) +ARCH = $(HOSTARCH) OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ -- cgit v1.3.1 From 67e2fae3b767fd4444f3f161f6420d0d0338a10d Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Wed, 23 Sep 2020 14:19:38 -0700 Subject: kunit: tool: fix --alltests flag Alltests flag evidently stopped working when run from outside of the root of the source tree, so fix that. Also add an additional broken config to the broken_on_uml config. Signed-off-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/configs/broken_on_uml.config | 1 + tools/testing/kunit/kunit_kernel.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config index 239b9f03da2c..a7f0603d33f6 100644 --- a/tools/testing/kunit/configs/broken_on_uml.config +++ b/tools/testing/kunit/configs/broken_on_uml.config @@ -39,3 +39,4 @@ # CONFIG_QCOM_CPR is not set # CONFIG_RESET_BRCMSTB_RESCAL is not set # CONFIG_RESET_INTEL_GW is not set +# CONFIG_ADI_AXI_ADC is not set diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index e20e2056cb38..1b1826500f61 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -53,18 +53,23 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output) - def make_allyesconfig(self): + def make_allyesconfig(self, build_dir, make_options): kunit_parser.print_with_timestamp( 'Enabling all CONFIGs for UML...') + command = ['make', 'ARCH=um', 'allyesconfig'] + if make_options: + command.extend(make_options) + if build_dir: + command += ['O=' + build_dir] process = subprocess.Popen( - ['make', 'ARCH=um', 'allyesconfig'], + command, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) process.wait() kunit_parser.print_with_timestamp( 'Disabling broken configs to run KUnit tests...') with ExitStack() as es: - config = open(KCONFIG_PATH, 'a') + config = open(get_kconfig_path(build_dir), 'a') disable = open(BROKEN_ALLCONFIG_PATH, 'r').read() config.write(disable) kunit_parser.print_with_timestamp( @@ -161,9 +166,9 @@ class LinuxSourceTree(object): return self.build_config(build_dir, make_options) def build_um_kernel(self, alltests, jobs, build_dir, make_options): - if alltests: - self._ops.make_allyesconfig() try: + if alltests: + self._ops.make_allyesconfig(build_dir, make_options) self._ops.make_olddefconfig(build_dir, make_options) self._ops.make(jobs, build_dir, make_options) except (ConfigError, BuildError) as e: -- cgit v1.3.1 From bc2652b7ae1e1b85b5fbd3621c98a9c743ed89d6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 21 Sep 2020 15:36:57 +0100 Subject: selftest/net/xfrm: Add test for ipsec tunnel It's an exhaustive testing for ipsec: covering all encryption/ authentication/compression algorithms. The tests are run in two network namespaces, connected by veth interfaces. To make exhaustive testing less time-consuming, the tests are run in parallel tasks, specified by parameter to the selftest. As the patches set adds support for xfrm in compatible tasks, there are tests to check structures that differ in size between 64-bit and 32-bit applications. The selftest doesn't use libnl so that it can be easily compiled as compatible application and don't require compatible .so. Here is a diagram of the selftest: --------------- | selftest | | (parent) | --------------- | | | (pipe) | ---------- / | | \ /------------- / \ -------------\ | /----- -----\ | ---------|----------|----------------|----------|--------- | --------- --------- --------- --------- | | | child | | child | NS A | child | | child | | | --------- --------- --------- --------- | -------|------------|----------------|-------------|------ veth0 veth1 veth2 vethN ---------|------------|----------------|-------------|---------- | ------------ ------------ ------------ ------------ | | | gr.child | | gr.child | NS B | gr.child | | gr.child | | | ------------ ------------ ------------ ------------ | ---------------------------------------------------------------- The parent sends the description of a test (xfrm parameters) to the child, the child and grand child setup a tunnel over veth interface and test it by sending udp packets. Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Dmitry Safonov Signed-off-by: Steffen Klassert --- MAINTAINERS | 1 + tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/ipsec.c | 2195 ++++++++++++++++++++++++++++++++ 4 files changed, 2198 insertions(+) create mode 100644 tools/testing/selftests/net/ipsec.c (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index 9a545a631f0d..9f098c7d64fb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12145,6 +12145,7 @@ F: net/ipv6/ipcomp6.c F: net/ipv6/xfrm* F: net/key/ F: net/xfrm/ +F: tools/testing/selftests/net/ipsec.c NETWORKING [IPv4/IPv6] M: "David S. Miller" diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 742c499328b2..61ae899cfc17 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +ipsec msg_zerocopy socket psock_fanout diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 9491bbaa0831..edd4ac632dc8 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -29,6 +29,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_GEN_FILES += ipsec TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c new file mode 100644 index 000000000000..17ced7d6ce25 --- /dev/null +++ b/tools/testing/selftests/net/ipsec.c @@ -0,0 +1,2195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ipsec.c - Check xfrm on veth inside a net-ns. + * Copyright (c) 2018 Dmitry Safonov + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest.h" + +#define printk(fmt, ...) \ + ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__) + +#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ +#define MAX_PAYLOAD 2048 +#define XFRM_ALGO_KEY_BUF_SIZE 512 +#define MAX_PROCESSES (1 << 14) /* /16 mask divided by /30 subnets */ +#define INADDR_A ((in_addr_t) 0x0a000000) /* 10.0.0.0 */ +#define INADDR_B ((in_addr_t) 0xc0a80000) /* 192.168.0.0 */ + +/* /30 mask for one veth connection */ +#define PREFIX_LEN 30 +#define child_ip(nr) (4*nr + 1) +#define grchild_ip(nr) (4*nr + 2) + +#define VETH_FMT "ktst-%d" +#define VETH_LEN 12 + +static int nsfd_parent = -1; +static int nsfd_childa = -1; +static int nsfd_childb = -1; +static long page_size; + +/* + * ksft_cnt is static in kselftest, so isn't shared with children. + * We have to send a test result back to parent and count there. + * results_fd is a pipe with test feedback from children. + */ +static int results_fd[2]; + +const unsigned int ping_delay_nsec = 50 * 1000 * 1000; +const unsigned int ping_timeout = 300; +const unsigned int ping_count = 100; +const unsigned int ping_success = 80; + +static void randomize_buffer(void *buf, size_t buflen) +{ + int *p = (int *)buf; + size_t words = buflen / sizeof(int); + size_t leftover = buflen % sizeof(int); + + if (!buflen) + return; + + while (words--) + *p++ = rand(); + + if (leftover) { + int tmp = rand(); + + memcpy(buf + buflen - leftover, &tmp, leftover); + } + + return; +} + +static int unshare_open(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + if (unshare(CLONE_NEWNET) != 0) { + pr_err("unshare()"); + return -1; + } + + fd = open(netns_path, O_RDONLY); + if (fd <= 0) { + pr_err("open(%s)", netns_path); + return -1; + } + + return fd; +} + +static int switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) { + pr_err("setns()"); + return -1; + } + return 0; +} + +/* + * Running the test inside a new parent net namespace to bother less + * about cleanup on error-path. + */ +static int init_namespaces(void) +{ + nsfd_parent = unshare_open(); + if (nsfd_parent <= 0) + return -1; + + nsfd_childa = unshare_open(); + if (nsfd_childa <= 0) + return -1; + + if (switch_ns(nsfd_parent)) + return -1; + + nsfd_childb = unshare_open(); + if (nsfd_childb <= 0) + return -1; + + if (switch_ns(nsfd_parent)) + return -1; + return 0; +} + +static int netlink_sock(int *sock, uint32_t *seq_nr, int proto) +{ + if (*sock > 0) { + seq_nr++; + return 0; + } + + *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); + if (*sock <= 0) { + pr_err("socket(AF_NETLINK)"); + return -1; + } + + randomize_buffer(seq_nr, sizeof(*seq_nr)); + + return 0; +} + +static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh) +{ + return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len)); +} + +static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */ + struct rtattr *attr = rtattr_hdr(nh); + size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size); + + if (req_sz < nl_size) { + printk("req buf is too small: %zu < %zu", req_sz, nl_size); + return -1; + } + nh->nlmsg_len = nl_size; + + attr->rta_len = RTA_LENGTH(size); + attr->rta_type = rta_type; + memcpy(RTA_DATA(attr), payload, size); + + return 0; +} + +static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + struct rtattr *ret = rtattr_hdr(nh); + + if (rtattr_pack(nh, req_sz, rta_type, payload, size)) + return 0; + + return ret; +} + +static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type) +{ + return _rtattr_begin(nh, req_sz, rta_type, 0, 0); +} + +static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + char *nlmsg_end = (char *)nh + nh->nlmsg_len; + + attr->rta_len = nlmsg_end - (char *)attr; +} + +static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz, + const char *peer, int ns) +{ + struct ifinfomsg pi; + struct rtattr *peer_attr; + + memset(&pi, 0, sizeof(pi)); + pi.ifi_family = AF_UNSPEC; + pi.ifi_change = 0xFFFFFFFF; + + peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi)); + if (!peer_attr) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer))) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns))) + return -1; + + rtattr_end(nh, peer_attr); + + return 0; +} + +static int netlink_check_answer(int sock) +{ + struct nlmsgerror { + struct nlmsghdr hdr; + int error; + struct nlmsghdr orig_msg; + } answer; + + if (recv(sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type); + return -1; + } else if (answer.error) { + printk("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + return answer.error; + } + + return 0; +} + +static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a, + const char *peerb, int ns_b) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + const char veth_type[] = "veth"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b)) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int ip4_addr_set(int sock, uint32_t seq, const char *intf, + struct in_addr addr, uint8_t prefix) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifaddrmsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifa_family = AF_INET; + req.info.ifa_prefixlen = prefix; + req.info.ifa_index = if_nametoindex(intf); + +#ifdef DEBUG + { + char addr_str[IPV4_STR_SZ] = {}; + + strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1); + + printk("ip addr set %s", addr_str); + } +#endif + + if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int link_set_up(int sock, uint32_t seq, const char *intf) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = if_nametoindex(intf); + req.info.ifi_flags = IFF_UP; + req.info.ifi_change = IFF_UP; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int ip4_route_set(int sock, uint32_t seq, const char *intf, + struct in_addr src, struct in_addr dst) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char attrbuf[MAX_PAYLOAD]; + } req; + unsigned int index = if_nametoindex(intf); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + req.nh.nlmsg_seq = seq; + req.rt.rtm_family = AF_INET; + req.rt.rtm_dst_len = 32; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_BOOT; + req.rt.rtm_scope = RT_SCOPE_LINK; + req.rt.rtm_type = RTN_UNICAST; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(sock); +} + +static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth, + struct in_addr tunsrc, struct in_addr tundst) +{ + if (ip4_addr_set(route_sock, (*route_seq)++, "lo", + tunsrc, PREFIX_LEN)) { + printk("Failed to set ipv4 addr"); + return -1; + } + + if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) { + printk("Failed to set ipv4 route"); + return -1; + } + + return 0; +} + +static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst) +{ + struct in_addr intsrc = inet_makeaddr(INADDR_B, src); + struct in_addr tunsrc = inet_makeaddr(INADDR_A, src); + struct in_addr tundst = inet_makeaddr(INADDR_A, dst); + int route_sock = -1, ret = -1; + uint32_t route_seq; + + if (switch_ns(nsfd)) + return -1; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) { + printk("Failed to open netlink route socket in child"); + return -1; + } + + if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) { + printk("Failed to set ipv4 addr"); + goto err; + } + + if (link_set_up(route_sock, route_seq++, veth)) { + printk("Failed to bring up %s", veth); + goto err; + } + + if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) { + printk("Failed to add tunnel route on %s", veth); + goto err; + } + ret = 0; + +err: + close(route_sock); + return ret; +} + +#define ALGO_LEN 64 +enum desc_type { + CREATE_TUNNEL = 0, + ALLOCATE_SPI, + MONITOR_ACQUIRE, + EXPIRE_STATE, + EXPIRE_POLICY, +}; +const char *desc_name[] = { + "create tunnel", + "alloc spi", + "monitor acquire", + "expire state", + "expire policy" +}; +struct xfrm_desc { + enum desc_type type; + uint8_t proto; + char a_algo[ALGO_LEN]; + char e_algo[ALGO_LEN]; + char c_algo[ALGO_LEN]; + char ae_algo[ALGO_LEN]; + unsigned int icv_len; + /* unsigned key_len; */ +}; + +enum msg_type { + MSG_ACK = 0, + MSG_EXIT, + MSG_PING, + MSG_XFRM_PREPARE, + MSG_XFRM_ADD, + MSG_XFRM_DEL, + MSG_XFRM_CLEANUP, +}; + +struct test_desc { + enum msg_type type; + union { + struct { + in_addr_t reply_ip; + unsigned int port; + } ping; + struct xfrm_desc xfrm_desc; + } body; +}; + +struct test_result { + struct xfrm_desc desc; + unsigned int res; +}; + +static void write_test_result(unsigned int res, struct xfrm_desc *d) +{ + struct test_result tr = {}; + ssize_t ret; + + tr.desc = *d; + tr.res = res; + + ret = write(results_fd[1], &tr, sizeof(tr)); + if (ret != sizeof(tr)) + pr_err("Failed to write the result in pipe %zd", ret); +} + +static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail) +{ + ssize_t bytes = write(fd, msg, sizeof(*msg)); + + /* Make sure that write/read is atomic to a pipe */ + BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF); + + if (bytes < 0) { + pr_err("write()"); + if (exit_of_fail) + exit(KSFT_FAIL); + } + if (bytes != sizeof(*msg)) { + pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg)); + if (exit_of_fail) + exit(KSFT_FAIL); + } +} + +static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail) +{ + ssize_t bytes = read(fd, msg, sizeof(*msg)); + + if (bytes < 0) { + pr_err("read()"); + if (exit_of_fail) + exit(KSFT_FAIL); + } + if (bytes != sizeof(*msg)) { + pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg)); + if (exit_of_fail) + exit(KSFT_FAIL); + } +} + +static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout, + unsigned int *server_port, int sock[2]) +{ + struct sockaddr_in server; + struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout }; + socklen_t s_len = sizeof(server); + + sock[0] = socket(AF_INET, SOCK_DGRAM, 0); + if (sock[0] < 0) { + pr_err("socket()"); + return -1; + } + + server.sin_family = AF_INET; + server.sin_port = 0; + memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr)); + + if (bind(sock[0], (struct sockaddr *)&server, s_len)) { + pr_err("bind()"); + goto err_close_server; + } + + if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) { + pr_err("getsockname()"); + goto err_close_server; + } + + *server_port = ntohs(server.sin_port); + + if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) { + pr_err("setsockopt()"); + goto err_close_server; + } + + sock[1] = socket(AF_INET, SOCK_DGRAM, 0); + if (sock[1] < 0) { + pr_err("socket()"); + goto err_close_server; + } + + return 0; + +err_close_server: + close(sock[0]); + return -1; +} + +static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len) +{ + struct sockaddr_in server; + const struct sockaddr *dest_addr = (struct sockaddr *)&server; + char *sock_buf[buf_len]; + ssize_t r_bytes, s_bytes; + + server.sin_family = AF_INET; + server.sin_port = htons(port); + server.sin_addr.s_addr = dest_ip; + + s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server)); + if (s_bytes < 0) { + pr_err("sendto()"); + return -1; + } else if (s_bytes != buf_len) { + printk("send part of the message: %zd/%zu", s_bytes, sizeof(server)); + return -1; + } + + r_bytes = recv(sock[0], sock_buf, buf_len, 0); + if (r_bytes < 0) { + if (errno != EAGAIN) + pr_err("recv()"); + return -1; + } else if (r_bytes == 0) { /* EOF */ + printk("EOF on reply to ping"); + return -1; + } else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) { + printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len); + return -1; + } + + return 0; +} + +static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len) +{ + struct sockaddr_in server; + const struct sockaddr *dest_addr = (struct sockaddr *)&server; + char *sock_buf[buf_len]; + ssize_t r_bytes, s_bytes; + + server.sin_family = AF_INET; + server.sin_port = htons(port); + server.sin_addr.s_addr = dest_ip; + + r_bytes = recv(sock[0], sock_buf, buf_len, 0); + if (r_bytes < 0) { + if (errno != EAGAIN) + pr_err("recv()"); + return -1; + } + if (r_bytes == 0) { /* EOF */ + printk("EOF on reply to ping"); + return -1; + } + if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) { + printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len); + return -1; + } + + s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server)); + if (s_bytes < 0) { + pr_err("sendto()"); + return -1; + } else if (s_bytes != buf_len) { + printk("send part of the message: %zd/%zu", s_bytes, sizeof(server)); + return -1; + } + + return 0; +} + +typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len); +static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from, + bool init_side, int d_port, in_addr_t to, ping_f func) +{ + struct test_desc msg; + unsigned int s_port, i, ping_succeeded = 0; + int ping_sock[2]; + char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {}; + + if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) { + printk("Failed to init ping"); + return -1; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_PING; + msg.body.ping.port = s_port; + memcpy(&msg.body.ping.reply_ip, &from, sizeof(from)); + + write_msg(cmd_fd, &msg, 0); + if (init_side) { + /* The other end sends ip to ping */ + read_msg(cmd_fd, &msg, 0); + if (msg.type != MSG_PING) + return -1; + to = msg.body.ping.reply_ip; + d_port = msg.body.ping.port; + } + + for (i = 0; i < ping_count ; i++) { + struct timespec sleep_time = { + .tv_sec = 0, + .tv_nsec = ping_delay_nsec, + }; + + ping_succeeded += !func(ping_sock, to, d_port, buf, page_size); + nanosleep(&sleep_time, 0); + } + + close(ping_sock[0]); + close(ping_sock[1]); + + strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1); + strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1); + + if (ping_succeeded < ping_success) { + printk("ping (%s) %s->%s failed %u/%u times", + init_side ? "send" : "reply", from_str, to_str, + ping_count - ping_succeeded, ping_count); + return -1; + } + +#ifdef DEBUG + printk("ping (%s) %s->%s succeeded %u/%u times", + init_side ? "send" : "reply", from_str, to_str, + ping_succeeded, ping_count); +#endif + + return 0; +} + +static int xfrm_fill_key(char *name, char *buf, + size_t buf_len, unsigned int *key_len) +{ + /* TODO: use set/map instead */ + if (strncmp(name, "digest_null", ALGO_LEN) == 0) + *key_len = 0; + else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0) + *key_len = 0; + else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0) + *key_len = 64; + else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0) + *key_len = 192; + else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0) + *key_len = 384; + else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0) + *key_len = 448; + else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0) + *key_len = 512; + else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0) + *key_len = 152; + else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0) + *key_len = 224; + else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0) + *key_len = 224; + else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0) + *key_len = 216; + else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0) + *key_len = 280; + else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0) + *key_len = 0; + + if (*key_len > buf_len) { + printk("Can't pack a key - too big for buffer"); + return -1; + } + + randomize_buffer(buf, *key_len); + + return 0; +} + +static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz, + struct xfrm_desc *desc) +{ + struct { + union { + struct xfrm_algo alg; + struct xfrm_algo_aead aead; + struct xfrm_algo_auth auth; + } u; + char buf[XFRM_ALGO_KEY_BUF_SIZE]; + } alg = {}; + size_t alen, elen, clen, aelen; + unsigned short type; + + alen = strlen(desc->a_algo); + elen = strlen(desc->e_algo); + clen = strlen(desc->c_algo); + aelen = strlen(desc->ae_algo); + + /* Verify desc */ + switch (desc->proto) { + case IPPROTO_AH: + if (!alen || elen || clen || aelen) { + printk("BUG: buggy ah desc"); + return -1; + } + strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + type = XFRMA_ALG_AUTH; + break; + case IPPROTO_COMP: + if (!clen || elen || alen || aelen) { + printk("BUG: buggy comp desc"); + return -1; + } + strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + type = XFRMA_ALG_COMP; + break; + case IPPROTO_ESP: + if (!((alen && elen) ^ aelen) || clen) { + printk("BUG: buggy esp desc"); + return -1; + } + if (aelen) { + alg.u.aead.alg_icv_len = desc->icv_len; + strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key, + sizeof(alg.buf), &alg.u.aead.alg_key_len)) + return -1; + type = XFRMA_ALG_AEAD; + } else { + + strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1); + type = XFRMA_ALG_CRYPT; + if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg))) + return -1; + + strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN); + type = XFRMA_ALG_AUTH; + if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + } + break; + default: + printk("BUG: unknown proto in desc"); + return -1; + } + + if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg))) + return -1; + + return 0; +} + +static inline uint32_t gen_spi(struct in_addr src) +{ + return htonl(inet_lnaof(src)); +} + +static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + struct xfrm_usersa_info info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = XFRM_MSG_NEWSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill selector. */ + memcpy(&req.info.sel.daddr, &dst, sizeof(dst)); + memcpy(&req.info.sel.saddr, &src, sizeof(src)); + req.info.sel.family = AF_INET; + req.info.sel.prefixlen_d = PREFIX_LEN; + req.info.sel.prefixlen_s = PREFIX_LEN; + + /* Fill id */ + memcpy(&req.info.id.daddr, &dst, sizeof(dst)); + /* Note: zero-spi cannot be deleted */ + req.info.id.spi = spi; + req.info.id.proto = desc->proto; + + memcpy(&req.info.saddr, &src, sizeof(src)); + + /* Fill lifteme_cfg */ + req.info.lft.soft_byte_limit = XFRM_INF; + req.info.lft.hard_byte_limit = XFRM_INF; + req.info.lft.soft_packet_limit = XFRM_INF; + req.info.lft.hard_packet_limit = XFRM_INF; + + req.info.family = AF_INET; + req.info.mode = XFRM_MODE_TUNNEL; + + if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc)) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + if (memcmp(&info->sel.daddr, &dst, sizeof(dst))) + return false; + + if (memcmp(&info->sel.saddr, &src, sizeof(src))) + return false; + + if (info->sel.family != AF_INET || + info->sel.prefixlen_d != PREFIX_LEN || + info->sel.prefixlen_s != PREFIX_LEN) + return false; + + if (info->id.spi != spi || info->id.proto != desc->proto) + return false; + + if (memcmp(&info->id.daddr, &dst, sizeof(dst))) + return false; + + if (memcmp(&info->saddr, &src, sizeof(src))) + return false; + + if (info->lft.soft_byte_limit != XFRM_INF || + info->lft.hard_byte_limit != XFRM_INF || + info->lft.soft_packet_limit != XFRM_INF || + info->lft.hard_packet_limit != XFRM_INF) + return false; + + if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL) + return false; + + /* XXX: check xfrm algo, see xfrm_state_pack_algo(). */ + + return true; +} + +static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + char attrbuf[MAX_PAYLOAD]; + } req; + struct { + struct nlmsghdr nh; + union { + struct xfrm_usersa_info info; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } answer; + struct xfrm_address_filter filter = {}; + bool found = false; + + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(0); + req.nh.nlmsg_type = XFRM_MSG_GETSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nh.nlmsg_seq = seq; + + /* + * Add dump filter by source address as there may be other tunnels + * in this netns (if tests run in parallel). + */ + filter.family = AF_INET; + filter.splen = 0x1f; /* 0xffffffff mask see addr_match() */ + memcpy(&filter.saddr, &src, sizeof(src)); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER, + &filter, sizeof(filter))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + while (1) { + if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return -1; + } + if (answer.nh.nlmsg_type == NLMSG_ERROR) { + printk("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + return -1; + } else if (answer.nh.nlmsg_type == NLMSG_DONE) { + if (found) + return 0; + printk("didn't find allocated xfrm state in dump"); + return -1; + } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) { + if (xfrm_usersa_found(&answer.info, spi, src, dst, desc)) + found = true; + } + } +} + +static int xfrm_set(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, + struct xfrm_desc *desc) +{ + int err; + + err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc); + if (err) { + printk("Failed to add xfrm state"); + return -1; + } + + err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc); + if (err) { + printk("Failed to add xfrm state"); + return -1; + } + + /* Check dumps for XFRM_MSG_GETSA */ + err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc); + err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc); + if (err) { + printk("Failed to check xfrm state"); + return -1; + } + + return 0; +} + +static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, uint8_t dir, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userpolicy_info info; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrm_user_tmpl tmpl; + + memset(&req, 0, sizeof(req)); + memset(&tmpl, 0, sizeof(tmpl)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = XFRM_MSG_NEWPOLICY; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill selector. */ + memcpy(&req.info.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc)); + req.info.sel.family = AF_INET; + req.info.sel.prefixlen_d = PREFIX_LEN; + req.info.sel.prefixlen_s = PREFIX_LEN; + + /* Fill lifteme_cfg */ + req.info.lft.soft_byte_limit = XFRM_INF; + req.info.lft.hard_byte_limit = XFRM_INF; + req.info.lft.soft_packet_limit = XFRM_INF; + req.info.lft.hard_packet_limit = XFRM_INF; + + req.info.dir = dir; + + /* Fill tmpl */ + memcpy(&tmpl.id.daddr, &dst, sizeof(dst)); + /* Note: zero-spi cannot be deleted */ + tmpl.id.spi = spi; + tmpl.id.proto = proto; + tmpl.family = AF_INET; + memcpy(&tmpl.saddr, &src, sizeof(src)); + tmpl.mode = XFRM_MODE_TUNNEL; + tmpl.aalgos = (~(uint32_t)0); + tmpl.ealgos = (~(uint32_t)0); + tmpl.calgos = (~(uint32_t)0); + + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_prepare(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, + XFRM_POLICY_OUT, tunsrc, tundst, proto)) { + printk("Failed to add xfrm policy"); + return -1; + } + + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, + XFRM_POLICY_IN, tunsrc, tundst, proto)) { + printk("Failed to add xfrm policy"); + return -1; + } + + return 0; +} + +static int xfrm_policy_del(int xfrm_sock, uint32_t seq, + struct in_addr src, struct in_addr dst, uint8_t dir, + struct in_addr tunsrc, struct in_addr tundst) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userpolicy_id id; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id)); + req.nh.nlmsg_type = XFRM_MSG_DELPOLICY; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill id */ + memcpy(&req.id.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc)); + req.id.sel.family = AF_INET; + req.id.sel.prefixlen_d = PREFIX_LEN; + req.id.sel.prefixlen_s = PREFIX_LEN; + req.id.dir = dir; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_cleanup(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst) +{ + if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst, + XFRM_POLICY_OUT, tunsrc, tundst)) { + printk("Failed to add xfrm policy"); + return -1; + } + + if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src, + XFRM_POLICY_IN, tunsrc, tundst)) { + printk("Failed to add xfrm policy"); + return -1; + } + + return 0; +} + +static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_usersa_id id; + char attrbuf[MAX_PAYLOAD]; + } req; + xfrm_address_t saddr = {}; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id)); + req.nh.nlmsg_type = XFRM_MSG_DELSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + memcpy(&req.id.daddr, &dst, sizeof(dst)); + req.id.family = AF_INET; + req.id.proto = proto; + /* Note: zero-spi cannot be deleted */ + req.id.spi = spi; + + memcpy(&saddr, &src, sizeof(src)); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_delete(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) { + printk("Failed to remove xfrm state"); + return -1; + } + + if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) { + printk("Failed to remove xfrm state"); + return -1; + } + + return 0; +} + +static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq, + uint32_t spi, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userspi_info spi; + } req; + struct { + struct nlmsghdr nh; + union { + struct xfrm_usersa_info info; + int error; + }; + } answer; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.spi)); + req.nh.nlmsg_type = XFRM_MSG_ALLOCSPI; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + + req.spi.info.family = AF_INET; + req.spi.min = spi; + req.spi.max = spi; + req.spi.info.id.proto = proto; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) { + uint32_t new_spi = htonl(answer.info.id.spi); + + if (new_spi != spi) { + printk("allocated spi is different from requested: %#x != %#x", + new_spi, spi); + return KSFT_FAIL; + } + return KSFT_PASS; + } else if (answer.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type); + return KSFT_FAIL; + } + + printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error)); + return (answer.error) ? KSFT_FAIL : KSFT_PASS; +} + +static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups) +{ + struct sockaddr_nl snl = {}; + socklen_t addr_len; + int ret = -1; + + snl.nl_family = AF_NETLINK; + snl.nl_groups = groups; + + if (netlink_sock(sock, seq, proto)) { + printk("Failed to open xfrm netlink socket"); + return -1; + } + + if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) { + pr_err("bind()"); + goto out_close; + } + + addr_len = sizeof(snl); + if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) { + pr_err("getsockname()"); + goto out_close; + } + if (addr_len != sizeof(snl)) { + printk("Wrong address length %d", addr_len); + goto out_close; + } + if (snl.nl_family != AF_NETLINK) { + printk("Wrong address family %d", snl.nl_family); + goto out_close; + } + return 0; + +out_close: + close(*sock); + return ret; +} + +static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_acquire acq; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrm_user_tmpl xfrm_tmpl = {}; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.acq)); + req.nh.nlmsg_type = XFRM_MSG_ACQUIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + req.acq.policy.sel.family = AF_INET; + req.acq.aalgos = 0xfeed; + req.acq.ealgos = 0xbaad; + req.acq.calgos = 0xbabe; + + xfrm_tmpl.family = AF_INET; + xfrm_tmpl.id.proto = IPPROTO_ESP; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl))) + goto out_close; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad + || req.acq.calgos != 0xbabe) { + printk("xfrm_user_acquire has changed %x %x %x", + req.acq.aalgos, req.acq.ealgos, req.acq.calgos); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int xfrm_expire_state(int xfrm_sock, uint32_t *seq, + unsigned int nr, struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_expire expire; + int error; + }; + } req; + struct in_addr src, dst; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + + if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) { + printk("Failed to add xfrm state"); + return KSFT_FAIL; + } + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire)); + req.nh.nlmsg_type = XFRM_MSG_EXPIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst)); + req.expire.state.id.spi = gen_spi(src); + req.expire.state.id.proto = desc->proto; + req.expire.state.family = AF_INET; + req.expire.hard = 0xff; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.expire.hard != 0x1) { + printk("expire.hard is not set: %x", req.expire.hard); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq, + unsigned int nr, struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_polexpire expire; + int error; + }; + } req; + struct in_addr src, dst, tunsrc, tundst; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, child_ip(nr)); + tundst = inet_makeaddr(INADDR_A, grchild_ip(nr)); + + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, + XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) { + printk("Failed to add xfrm policy"); + return KSFT_FAIL; + } + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire)); + req.nh.nlmsg_type = XFRM_MSG_POLEXPIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + /* Fill selector. */ + memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc)); + req.expire.pol.sel.family = AF_INET; + req.expire.pol.sel.prefixlen_d = PREFIX_LEN; + req.expire.pol.sel.prefixlen_s = PREFIX_LEN; + req.expire.pol.dir = XFRM_POLICY_OUT; + req.expire.hard = 0xff; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.expire.hard != 0x1) { + printk("expire.hard is not set: %x", req.expire.hard); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int child_serv(int xfrm_sock, uint32_t *seq, + unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) +{ + struct in_addr src, dst, tunsrc, tundst; + struct test_desc msg; + int ret = KSFT_FAIL; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, child_ip(nr)); + tundst = inet_makeaddr(INADDR_A, grchild_ip(nr)); + + /* UDP pinging without xfrm */ + if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) { + printk("ping failed before setting xfrm"); + return KSFT_FAIL; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_PREPARE; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + + if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) { + printk("failed to prepare xfrm"); + goto cleanup; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_ADD; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) { + printk("failed to set xfrm"); + goto delete; + } + + /* UDP pinging with xfrm tunnel */ + if (do_ping(cmd_fd, buf, page_size, tunsrc, + true, 0, 0, udp_ping_send)) { + printk("ping failed for xfrm"); + goto delete; + } + + ret = KSFT_PASS; +delete: + /* xfrm delete */ + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_DEL; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + + if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) { + printk("failed ping to remove xfrm"); + ret = KSFT_FAIL; + } + +cleanup: + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_CLEANUP; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) { + printk("failed ping to cleanup xfrm"); + ret = KSFT_FAIL; + } + return ret; +} + +static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) +{ + struct xfrm_desc desc; + struct test_desc msg; + int xfrm_sock = -1; + uint32_t seq; + + if (switch_ns(nsfd_childa)) + exit(KSFT_FAIL); + + if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) { + printk("Failed to open xfrm netlink socket"); + exit(KSFT_FAIL); + } + + /* Check that seq sock is ready, just for sure. */ + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_ACK; + write_msg(cmd_fd, &msg, 1); + read_msg(cmd_fd, &msg, 1); + if (msg.type != MSG_ACK) { + printk("Ack failed"); + exit(KSFT_FAIL); + } + + for (;;) { + ssize_t received = read(test_desc_fd, &desc, sizeof(desc)); + int ret; + + if (received == 0) /* EOF */ + break; + + if (received != sizeof(desc)) { + pr_err("read() returned %zd", received); + exit(KSFT_FAIL); + } + + switch (desc.type) { + case CREATE_TUNNEL: + ret = child_serv(xfrm_sock, &seq, nr, + cmd_fd, buf, &desc); + break; + case ALLOCATE_SPI: + ret = xfrm_state_allocspi(xfrm_sock, &seq, + -1, desc.proto); + break; + case MONITOR_ACQUIRE: + ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr); + break; + case EXPIRE_STATE: + ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc); + break; + case EXPIRE_POLICY: + ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); + break; + default: + printk("Unknown desc type %d", desc.type); + exit(KSFT_FAIL); + } + write_test_result(ret, &desc); + } + + close(xfrm_sock); + + msg.type = MSG_EXIT; + write_msg(cmd_fd, &msg, 1); + exit(KSFT_PASS); +} + +static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf, + struct test_desc *msg, int xfrm_sock, uint32_t *seq) +{ + struct in_addr src, dst, tunsrc, tundst; + bool tun_reply; + struct xfrm_desc *desc = &msg->body.xfrm_desc; + + src = inet_makeaddr(INADDR_B, grchild_ip(nr)); + dst = inet_makeaddr(INADDR_B, child_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr)); + tundst = inet_makeaddr(INADDR_A, child_ip(nr)); + + switch (msg->type) { + case MSG_EXIT: + exit(KSFT_PASS); + case MSG_ACK: + write_msg(cmd_fd, msg, 1); + break; + case MSG_PING: + tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t)); + /* UDP pinging without xfrm */ + if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src, + false, msg->body.ping.port, + msg->body.ping.reply_ip, udp_ping_reply)) { + printk("ping failed before setting xfrm"); + } + break; + case MSG_XFRM_PREPARE: + if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, + desc->proto)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to prepare xfrm"); + } + break; + case MSG_XFRM_ADD: + if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to set xfrm"); + } + break; + case MSG_XFRM_DEL: + if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, + desc->proto)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to remove xfrm"); + } + break; + case MSG_XFRM_CLEANUP: + if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) { + printk("failed to cleanup xfrm"); + } + break; + default: + printk("got unknown msg type %d", msg->type); + }; +} + +static int grand_child_f(unsigned int nr, int cmd_fd, void *buf) +{ + struct test_desc msg; + int xfrm_sock = -1; + uint32_t seq; + + if (switch_ns(nsfd_childb)) + exit(KSFT_FAIL); + + if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) { + printk("Failed to open xfrm netlink socket"); + exit(KSFT_FAIL); + } + + do { + read_msg(cmd_fd, &msg, 1); + grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq); + } while (1); + + close(xfrm_sock); + exit(KSFT_FAIL); +} + +static int start_child(unsigned int nr, char *veth, int test_desc_fd[2]) +{ + int cmd_sock[2]; + void *data_map; + pid_t child; + + if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr))) + return -1; + + if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr))) + return -1; + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } else if (child) { + /* in parent - selftest */ + return switch_ns(nsfd_parent); + } + + if (close(test_desc_fd[1])) { + pr_err("close()"); + return -1; + } + + /* child */ + data_map = mmap(0, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (data_map == MAP_FAILED) { + pr_err("mmap()"); + return -1; + } + + randomize_buffer(data_map, page_size); + + if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) { + pr_err("socketpair()"); + return -1; + } + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } else if (child) { + if (close(cmd_sock[0])) { + pr_err("close()"); + return -1; + } + return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map); + } + if (close(cmd_sock[1])) { + pr_err("close()"); + return -1; + } + return grand_child_f(nr, cmd_sock[0], data_map); +} + +static void exit_usage(char **argv) +{ + printk("Usage: %s [nr_process]", argv[0]); + exit(KSFT_FAIL); +} + +static int __write_desc(int test_desc_fd, struct xfrm_desc *desc) +{ + ssize_t ret; + + ret = write(test_desc_fd, desc, sizeof(*desc)); + + if (ret == sizeof(*desc)) + return 0; + + pr_err("Writing test's desc failed %ld", ret); + + return -1; +} + +static int write_desc(int proto, int test_desc_fd, + char *a, char *e, char *c, char *ae) +{ + struct xfrm_desc desc = {}; + + desc.type = CREATE_TUNNEL; + desc.proto = proto; + + if (a) + strncpy(desc.a_algo, a, ALGO_LEN - 1); + if (e) + strncpy(desc.e_algo, e, ALGO_LEN - 1); + if (c) + strncpy(desc.c_algo, c, ALGO_LEN - 1); + if (ae) + strncpy(desc.ae_algo, ae, ALGO_LEN - 1); + + return __write_desc(test_desc_fd, &desc); +} + +int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP }; +char *ah_list[] = { + "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)", + "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)", + "xcbc(aes)", "cmac(aes)" +}; +char *comp_list[] = { + "deflate", +#if 0 + /* No compression backend realization */ + "lzs", "lzjh" +#endif +}; +char *e_list[] = { + "ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)", + "cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)", + "cbc(twofish)", "rfc3686(ctr(aes))" +}; +char *ae_list[] = { +#if 0 + /* not implemented */ + "rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))", + "rfc7539esp(chacha20,poly1305)" +#endif +}; + +const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \ + + (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \ + + ARRAY_SIZE(ae_list); + +static int write_proto_plan(int fd, int proto) +{ + unsigned int i; + + switch (proto) { + case IPPROTO_AH: + for (i = 0; i < ARRAY_SIZE(ah_list); i++) { + if (write_desc(proto, fd, ah_list[i], 0, 0, 0)) + return -1; + } + break; + case IPPROTO_COMP: + for (i = 0; i < ARRAY_SIZE(comp_list); i++) { + if (write_desc(proto, fd, 0, 0, comp_list[i], 0)) + return -1; + } + break; + case IPPROTO_ESP: + for (i = 0; i < ARRAY_SIZE(ah_list); i++) { + int j; + + for (j = 0; j < ARRAY_SIZE(e_list); j++) { + if (write_desc(proto, fd, ah_list[i], + e_list[j], 0, 0)) + return -1; + } + } + for (i = 0; i < ARRAY_SIZE(ae_list); i++) { + if (write_desc(proto, fd, 0, 0, 0, ae_list[i])) + return -1; + } + break; + default: + printk("BUG: Specified unknown proto %d", proto); + return -1; + } + + return 0; +} + +/* + * Some structures in xfrm uapi header differ in size between + * 64-bit and 32-bit ABI: + * + * 32-bit UABI | 64-bit UABI + * -------------------------------------|------------------------------------- + * sizeof(xfrm_usersa_info) = 220 | sizeof(xfrm_usersa_info) = 224 + * sizeof(xfrm_userpolicy_info) = 164 | sizeof(xfrm_userpolicy_info) = 168 + * sizeof(xfrm_userspi_info) = 228 | sizeof(xfrm_userspi_info) = 232 + * sizeof(xfrm_user_acquire) = 276 | sizeof(xfrm_user_acquire) = 280 + * sizeof(xfrm_user_expire) = 224 | sizeof(xfrm_user_expire) = 232 + * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 + * + * Check the affected by the UABI difference structures. + */ +const unsigned int compat_plan = 4; +static int write_compat_struct_tests(int test_desc_fd) +{ + struct xfrm_desc desc = {}; + + desc.type = ALLOCATE_SPI; + desc.proto = IPPROTO_AH; + strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1); + + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = MONITOR_ACQUIRE; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = EXPIRE_STATE; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = EXPIRE_POLICY; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + return 0; +} + +static int write_test_plan(int test_desc_fd) +{ + unsigned int i; + pid_t child; + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } + if (child) { + if (close(test_desc_fd)) + printk("close(): %m"); + return 0; + } + + if (write_compat_struct_tests(test_desc_fd)) + exit(KSFT_FAIL); + + for (i = 0; i < ARRAY_SIZE(proto_list); i++) { + if (write_proto_plan(test_desc_fd, proto_list[i])) + exit(KSFT_FAIL); + } + + exit(KSFT_PASS); +} + +static int children_cleanup(void) +{ + unsigned ret = KSFT_PASS; + + while (1) { + int status; + pid_t p = wait(&status); + + if ((p < 0) && errno == ECHILD) + break; + + if (p < 0) { + pr_err("wait()"); + return KSFT_FAIL; + } + + if (!WIFEXITED(status)) { + ret = KSFT_FAIL; + continue; + } + + if (WEXITSTATUS(status) == KSFT_FAIL) + ret = KSFT_FAIL; + } + + return ret; +} + +typedef void (*print_res)(const char *, ...); + +static int check_results(void) +{ + struct test_result tr = {}; + struct xfrm_desc *d = &tr.desc; + int ret = KSFT_PASS; + + while (1) { + ssize_t received = read(results_fd[0], &tr, sizeof(tr)); + print_res result; + + if (received == 0) /* EOF */ + break; + + if (received != sizeof(tr)) { + pr_err("read() returned %zd", received); + return KSFT_FAIL; + } + + switch (tr.res) { + case KSFT_PASS: + result = ksft_test_result_pass; + break; + case KSFT_FAIL: + default: + result = ksft_test_result_fail; + ret = KSFT_FAIL; + } + + result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n", + desc_name[d->type], (unsigned int)d->proto, d->a_algo, + d->e_algo, d->c_algo, d->ae_algo, d->icv_len); + } + + return ret; +} + +int main(int argc, char **argv) +{ + unsigned int nr_process = 1; + int route_sock = -1, ret = KSFT_SKIP; + int test_desc_fd[2]; + uint32_t route_seq; + unsigned int i; + + if (argc > 2) + exit_usage(argv); + + if (argc > 1) { + char *endptr; + + errno = 0; + nr_process = strtol(argv[1], &endptr, 10); + if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN)) + || (errno != 0 && nr_process == 0) + || (endptr == argv[1]) || (*endptr != '\0')) { + printk("Failed to parse [nr_process]"); + exit_usage(argv); + } + + if (nr_process > MAX_PROCESSES || !nr_process) { + printk("nr_process should be between [1; %u]", + MAX_PROCESSES); + exit_usage(argv); + } + } + + srand(time(NULL)); + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 1) + ksft_exit_skip("sysconf(): %m\n"); + + if (pipe2(test_desc_fd, O_DIRECT) < 0) + ksft_exit_skip("pipe(): %m\n"); + + if (pipe2(results_fd, O_DIRECT) < 0) + ksft_exit_skip("pipe(): %m\n"); + + if (init_namespaces()) + ksft_exit_skip("Failed to create namespaces\n"); + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + ksft_exit_skip("Failed to open netlink route socket\n"); + + for (i = 0; i < nr_process; i++) { + char veth[VETH_LEN]; + + snprintf(veth, VETH_LEN, VETH_FMT, i); + + if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) { + close(route_sock); + ksft_exit_fail_msg("Failed to create veth device"); + } + + if (start_child(i, veth, test_desc_fd)) { + close(route_sock); + ksft_exit_fail_msg("Child %u failed to start", i); + } + } + + if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1])) + ksft_exit_fail_msg("close(): %m"); + + ksft_set_plan(proto_plan + compat_plan); + + if (write_test_plan(test_desc_fd[1])) + ksft_exit_fail_msg("Failed to write test plan to pipe"); + + ret = check_results(); + + if (children_cleanup() == KSFT_FAIL) + exit(KSFT_FAIL); + + exit(ret); +} -- cgit v1.3.1 From be61316003d962cece23cca867aafd8e67e48a90 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Sep 2020 08:29:52 +0800 Subject: selftests: mptcp: add ADD_ADDR mibs check function This patch added the ADD_ADDR related mibs counter check function chk_add_nr(). This function check both ADD_ADDR and ADD_ADDR with echo flag. The output looks like this: 07 unused signal address syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] 08 signal address syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] 09 subflow and signal syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] 10 multiple subflows and signal syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] 11 remove subflow and signal syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 44 +++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c2943e4dfcfe..9d64abdde146 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -276,6 +276,43 @@ chk_join_nr() fi } +chk_add_nr() +{ + local add_nr=$1 + local echo_nr=$2 + local count + local dump_stats + + printf "%-39s %s" " " "add" + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$add_nr" ]; then + echo "[fail] got $count ADD_ADDR[s] expected $add_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - echo " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$echo_nr" ]; then + echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -332,6 +369,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "unused signal address" 0 0 0 +chk_add_nr 1 1 # accept and use add_addr reset @@ -340,6 +378,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address" 1 1 1 +chk_add_nr 1 1 # accept and use add_addr with an additional subflow # note: signal address in server ns and local addresses in client ns must @@ -352,6 +391,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal" 2 2 2 +chk_add_nr 1 1 # accept and use add_addr with additional subflows reset @@ -362,6 +402,7 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 +chk_add_nr 1 1 # single subflow, syncookies reset_with_cookies @@ -396,6 +437,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address with syn cookies" 1 1 1 +chk_add_nr 1 1 # test cookie with subflow and signal reset_with_cookies @@ -405,6 +447,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal w cookies" 2 2 2 +chk_add_nr 1 1 # accept and use add_addr with additional subflows reset_with_cookies @@ -415,5 +458,6 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflows and signal w. cookies" 3 3 3 +chk_add_nr 1 1 exit $ret -- cgit v1.3.1 From 1315332409fe4242641c2c3e0c20cd8b59f52d85 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Sep 2020 08:29:58 +0800 Subject: selftests: mptcp: add remove cfg in mptcp_connect This patch added a new cfg, named cfg_remove in mptcp_connect. This new cfg_remove is copied from cfg_join. The only difference between them is in the do_rnd_write function. Here we slow down the transfer process of all data to let the RM_ADDR suboption can be sent and received completely. Otherwise the remove address and subflow test cases don't work. Suggested-by: Matthieu Baerts Suggested-by: Paolo Abeni Suggested-by: Mat Martineau Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index a54966531a64..77bb62feb872 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -54,6 +54,7 @@ static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; +static bool cfg_remove; static int cfg_wait; static void die_usage(void) @@ -271,6 +272,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (cfg_join && first && do_w > 100) do_w = 100; + if (cfg_remove && do_w > 50) + do_w = 50; + bw = write(fd, buf, do_w); if (bw < 0) perror("write"); @@ -281,6 +285,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) first = false; } + if (cfg_remove) + usleep(200000); + return bw; } @@ -428,7 +435,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_join) + if (cfg_join || cfg_remove) usleep(cfg_wait); close(peerfd); @@ -686,7 +693,7 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (!cfg_join && (r & 1)) + if (!(cfg_join || cfg_remove) && (r & 1)) close(fd); } @@ -822,13 +829,18 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) { + while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) { switch (c) { case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; cfg_wait = 400000; break; + case 'r': + cfg_remove = true; + cfg_mode = CFG_MODE_POLL; + cfg_wait = 400000; + break; case 'l': listen_mode = true; break; -- cgit v1.3.1 From dd72b0fedee1747698160cf3f839f332a4788059 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Sep 2020 08:29:59 +0800 Subject: selftests: mptcp: add remove addr and subflow test cases This patch added the remove addr and subflow test cases and two new functions. The first function run_remove_tests calls do_transfer with two new arguments, rm_nr_ns1 and rm_nr_ns2, for the numbers of addresses should be removed during the transfer process in namespace 1 and namespace 2. If both these two arguments are 0, we do the join test cases with "mptcp_connect -j" command. Otherwise, do the remove test cases with "mptcp_connect -r" command. The second function chk_rm_nr checks the RM_ADDR related mibs's counters. The output of the test cases looks like this: 11 remove single subflow syn[ ok ] - synack[ ok ] - ack[ ok ] rm [ ok ] - sf [ ok ] 12 remove multiple subflows syn[ ok ] - synack[ ok ] - ack[ ok ] rm [ ok ] - sf [ ok ] 13 remove single address syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] rm [ ok ] - sf [ ok ] 14 remove subflow and signal syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] rm [ ok ] - sf [ ok ] 15 remove subflows and signal syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] rm [ ok ] - sf [ ok ] Suggested-by: Matthieu Baerts Suggested-by: Paolo Abeni Suggested-by: Mat Martineau Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 145 +++++++++++++++++++++++- 1 file changed, 142 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9d64abdde146..08f53d86dedc 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,6 +8,7 @@ cin="" cout="" ksft_skip=4 timeout=30 +mptcp_connect="" capture=0 TEST_COUNT=0 @@ -132,6 +133,8 @@ do_transfer() cl_proto="$3" srv_proto="$4" connect_addr="$5" + rm_nr_ns1="$6" + rm_nr_ns2="$7" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -156,14 +159,44 @@ do_transfer() sleep 1 fi - ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & + if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then + mptcp_connect="./mptcp_connect -j" + else + mptcp_connect="./mptcp_connect -r" + fi + + ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & spid=$! sleep 1 - ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & cpid=$! + if [ $rm_nr_ns1 -gt 0 ]; then + counter=1 + sleep 1 + + while [ $counter -le $rm_nr_ns1 ] + do + ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + fi + + if [ $rm_nr_ns2 -gt 0 ]; then + counter=1 + sleep 1 + + while [ $counter -le $rm_nr_ns2 ] + do + ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + fi + wait $cpid retc=$? wait $spid @@ -219,7 +252,24 @@ run_tests() connect_addr="$3" lret=0 - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0 + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +run_remove_tests() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + rm_nr_ns1="$4" + rm_nr_ns2="$5" + lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2} lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -313,6 +363,43 @@ chk_add_nr() fi } +chk_rm_nr() +{ + local rm_addr_nr=$1 + local rm_subflow_nr=$2 + local count + local dump_stats + + printf "%-39s %s" " " "rm " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_addr_nr" ]; then + echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - sf " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_subflow_nr" ]; then + echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -404,6 +491,58 @@ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 +# single subflow, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 0 1 +chk_join_nr "remove single subflow" 1 1 1 +chk_rm_nr 1 1 + +# multiple subflows, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 0 2 +chk_join_nr "remove multiple subflows" 2 2 2 +chk_rm_nr 2 2 + +# single address, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_remove_tests $ns1 $ns2 10.0.1.1 1 0 +chk_join_nr "remove single address" 1 1 1 +chk_add_nr 1 1 +chk_rm_nr 0 0 + +# subflow and signal, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 1 1 +chk_join_nr "remove subflow and signal" 2 2 2 +chk_add_nr 1 1 +chk_rm_nr 1 1 + +# subflows and signal, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 1 2 +chk_join_nr "remove subflows and signal" 3 3 3 +chk_add_nr 1 1 +chk_rm_nr 2 2 + # single subflow, syncookies reset_with_cookies ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -- cgit v1.3.1 From ea366dd79c05fcd4cf5e225d2de8a3a7c293160c Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 23 Sep 2020 16:36:17 -0700 Subject: rseq/selftests,x86_64: Add rseq_offset_deref_addv() This patch adds rseq_offset_deref_addv() function to tools/testing/selftests/rseq/rseq-x86.h, to be used in a selftest in the next patch in the patchset. Once an architecture adds support for this function they should define "RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV". Signed-off-by: Peter Oskolkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mathieu Desnoyers Link: https://lkml.kernel.org/r/20200923233618.2572849-2-posk@google.com --- tools/testing/selftests/rseq/rseq-x86.h | 57 +++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index b2da6004fe30..640411518e46 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -279,6 +279,63 @@ error1: #endif } +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __attribute__((always_inline)) +int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) +#endif + /* get p+v */ + "movq %[ptr], %%rbx\n\t" + "addq %[off], %%rbx\n\t" + /* get pv */ + "movq (%%rbx), %%rcx\n\t" + /* *pv += inc */ + "addq %[inc], (%%rcx)\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_abi] "r" (&__rseq_abi), + /* final store input */ + [ptr] "m" (*ptr), + [off] "er" (off), + [inc] "er" (inc) + : "memory", "cc", "rax", "rbx", "rcx" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + static inline __attribute__((always_inline)) int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t newv2, -- cgit v1.3.1 From f166b111e0491486fca0d105f09655ab718bd1c8 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 23 Sep 2020 16:36:18 -0700 Subject: rseq/selftests: Test MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ Based on Google-internal RSEQ work done by Paul Turner and Andrew Hunter. This patch adds a selftest for MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ. The test quite often fails without the previous patch in this patchset, but consistently passes with it. Signed-off-by: Peter Oskolkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mathieu Desnoyers Link: https://lkml.kernel.org/r/20200923233618.2572849-3-posk@google.com --- tools/testing/selftests/rseq/param_test.c | 223 ++++++++++++++++++++++++- tools/testing/selftests/rseq/run_param_test.sh | 2 + 2 files changed, 224 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index e8a657a5f48a..384589095864 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: LGPL-2.1 #define _GNU_SOURCE #include +#include #include #include +#include #include #include #include @@ -1131,6 +1133,220 @@ static int set_signal_handler(void) return ret; } +struct test_membarrier_thread_args { + int stop; + intptr_t percpu_list_ptr; +}; + +/* Worker threads modify data in their "active" percpu lists. */ +void *test_membarrier_worker_thread(void *arg) +{ + struct test_membarrier_thread_args *args = + (struct test_membarrier_thread_args *)arg; + const int iters = opt_reps; + int i; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + /* Wait for initialization. */ + while (!atomic_load(&args->percpu_list_ptr)) {} + + for (i = 0; i < iters; ++i) { + int ret; + + do { + int cpu = rseq_cpu_start(); + + ret = rseq_offset_deref_addv(&args->percpu_list_ptr, + sizeof(struct percpu_list_entry) * cpu, 1, cpu); + } while (rseq_unlikely(ret)); + } + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + return NULL; +} + +void test_membarrier_init_percpu_list(struct percpu_list *list) +{ + int i; + + memset(list, 0, sizeof(*list)); + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_list_node *node; + + node = malloc(sizeof(*node)); + assert(node); + node->data = 0; + node->next = NULL; + list->c[i].head = node; + } +} + +void test_membarrier_free_percpu_list(struct percpu_list *list) +{ + int i; + + for (i = 0; i < CPU_SETSIZE; i++) + free(list->c[i].head); +} + +static int sys_membarrier(int cmd, int flags, int cpu_id) +{ + return syscall(__NR_membarrier, cmd, flags, cpu_id); +} + +/* + * The manager thread swaps per-cpu lists that worker threads see, + * and validates that there are no unexpected modifications. + */ +void *test_membarrier_manager_thread(void *arg) +{ + struct test_membarrier_thread_args *args = + (struct test_membarrier_thread_args *)arg; + struct percpu_list list_a, list_b; + intptr_t expect_a = 0, expect_b = 0; + int cpu_a = 0, cpu_b = 0; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + /* Init lists. */ + test_membarrier_init_percpu_list(&list_a); + test_membarrier_init_percpu_list(&list_b); + + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + + while (!atomic_load(&args->stop)) { + /* list_a is "active". */ + cpu_a = rand() % CPU_SETSIZE; + /* + * As list_b is "inactive", we should never see changes + * to list_b. + */ + if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { + fprintf(stderr, "Membarrier test failed\n"); + abort(); + } + + /* Make list_b "active". */ + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); + if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu_a) && + errno != ENXIO /* missing CPU */) { + perror("sys_membarrier"); + abort(); + } + /* + * Cpu A should now only modify list_b, so the values + * in list_a should be stable. + */ + expect_a = atomic_load(&list_a.c[cpu_a].head->data); + + cpu_b = rand() % CPU_SETSIZE; + /* + * As list_a is "inactive", we should never see changes + * to list_a. + */ + if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { + fprintf(stderr, "Membarrier test failed\n"); + abort(); + } + + /* Make list_a "active". */ + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu_b) && + errno != ENXIO /* missing CPU*/) { + perror("sys_membarrier"); + abort(); + } + /* Remember a value from list_b. */ + expect_b = atomic_load(&list_b.c[cpu_b].head->data); + } + + test_membarrier_free_percpu_list(&list_a); + test_membarrier_free_percpu_list(&list_b); + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + return NULL; +} + +/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ +#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +void test_membarrier(void) +{ + const int num_threads = opt_threads; + struct test_membarrier_thread_args thread_args; + pthread_t worker_threads[num_threads]; + pthread_t manager_thread; + int i, ret; + + if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { + perror("sys_membarrier"); + abort(); + } + + thread_args.stop = 0; + thread_args.percpu_list_ptr = 0; + ret = pthread_create(&manager_thread, NULL, + test_membarrier_manager_thread, &thread_args); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_create(&worker_threads[i], NULL, + test_membarrier_worker_thread, &thread_args); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(worker_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + atomic_store(&thread_args.stop, 1); + ret = pthread_join(manager_thread, NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } +} +#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ +void test_membarrier(void) +{ + fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " + "Skipping membarrier test.\n"); +} +#endif + static void show_usage(int argc, char **argv) { printf("Usage : %s \n", @@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv) printf(" [-r N] Number of repetitions per thread (default 5000)\n"); printf(" [-d] Disable rseq system call (no initialization)\n"); printf(" [-D M] Disable rseq for each M threads\n"); - printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); + printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); printf(" [-v] Verbose output.\n"); printf(" [-h] Show this help.\n"); @@ -1268,6 +1484,7 @@ int main(int argc, char **argv) case 'i': case 'b': case 'm': + case 'r': break; default: show_usage(argc, argv); @@ -1320,6 +1537,10 @@ int main(int argc, char **argv) printf_verbose("counter increment\n"); test_percpu_inc(); break; + case 'r': + printf_verbose("membarrier\n"); + test_membarrier(); + break; } if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh index e426304fd4a0..f51bc83c9e41 100755 --- a/tools/testing/selftests/rseq/run_param_test.sh +++ b/tools/testing/selftests/rseq/run_param_test.sh @@ -15,6 +15,7 @@ TEST_LIST=( "-T m" "-T m -M" "-T i" + "-T r" ) TEST_NAME=( @@ -25,6 +26,7 @@ TEST_NAME=( "memcpy" "memcpy with barrier" "increment" + "membarrier" ) IFS="$OLDIFS" -- cgit v1.3.1 From a5fa25adf03d4b063aece74ba70ccbb3a71af122 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:03:56 -0700 Subject: bpf: Change bpf_sk_release and bpf_sk_*cgroup_id to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON The previous patch allows the networking bpf prog to use the bpf_skc_to_*() helpers to get a PTR_TO_BTF_ID socket pointer, e.g. "struct tcp_sock *". It allows the bpf prog to read all the fields of the tcp_sock. This patch changes the bpf_sk_release() and bpf_sk_*cgroup_id() to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer returned by the bpf_skc_to_*() helpers also. For example, the following will work: sk = bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); if (!sk) return; tp = bpf_skc_to_tcp_sock(sk); if (!tp) { bpf_sk_release(sk); return; } lsndtime = tp->lsndtime; /* Pass tp to bpf_sk_release() will also work */ bpf_sk_release(tp); Since PTR_TO_BTF_ID could be NULL, the helper taking ARG_PTR_TO_BTF_ID_SOCK_COMMON has to check for NULL at runtime. A btf_id of "struct sock" may not always mean a fullsock. Regardless the helper's running context may get a non-fullsock or not, considering fullsock check/handling is pretty cheap, it is better to keep the same verifier expectation on helper that takes ARG_PTR_TO_BTF_ID* will be able to handle the minisock situation. In the bpf_sk_*cgroup_id() case, it will try to get a fullsock by using sk_to_full_sk() as its skb variant bpf_sk"b"_*cgroup_id() has already been doing. bpf_sk_release can already handle minisock, so nothing special has to be done. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000356.3856047-1-kafai@fb.com --- include/uapi/linux/bpf.h | 8 ++++---- net/core/filter.c | 30 ++++++++++++++---------------- tools/include/uapi/linux/bpf.h | 8 ++++---- 3 files changed, 22 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a22812561064..c96a56d9c3be 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2512,7 +2512,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * long bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(void *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -3234,11 +3234,11 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * u64 bpf_sk_cgroup_id(struct bpf_sock *sk) + * u64 bpf_sk_cgroup_id(void *sk) * Description * Return the cgroup v2 id of the socket *sk*. * - * *sk* must be a non-**NULL** pointer to a full socket, e.g. one + * *sk* must be a non-**NULL** pointer to a socket, e.g. one * returned from **bpf_sk_lookup_xxx**\ (), * **bpf_sk_fullsock**\ (), etc. The format of returned id is * same as in **bpf_skb_cgroup_id**\ (). @@ -3248,7 +3248,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level) + * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level) * Description * Return id of cgroup v2 that is ancestor of cgroup associated * with the *sk* at the *ancestor_level*. The root cgroup is at diff --git a/net/core/filter.c b/net/core/filter.c index 6d1864f2bd51..06d397eeef2a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4088,18 +4088,17 @@ static inline u64 __bpf_sk_cgroup_id(struct sock *sk) { struct cgroup *cgrp; + sk = sk_to_full_sk(sk); + if (!sk || !sk_fullsock(sk)) + return 0; + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); return cgroup_id(cgrp); } BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb) { - struct sock *sk = skb_to_full_sk(skb); - - if (!sk || !sk_fullsock(sk)) - return 0; - - return __bpf_sk_cgroup_id(sk); + return __bpf_sk_cgroup_id(skb->sk); } static const struct bpf_func_proto bpf_skb_cgroup_id_proto = { @@ -4115,6 +4114,10 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk, struct cgroup *ancestor; struct cgroup *cgrp; + sk = sk_to_full_sk(sk); + if (!sk || !sk_fullsock(sk)) + return 0; + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ancestor = cgroup_ancestor(cgrp, ancestor_level); if (!ancestor) @@ -4126,12 +4129,7 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk, BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int, ancestor_level) { - struct sock *sk = skb_to_full_sk(skb); - - if (!sk || !sk_fullsock(sk)) - return 0; - - return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level); + return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level); } static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = { @@ -4151,7 +4149,7 @@ static const struct bpf_func_proto bpf_sk_cgroup_id_proto = { .func = bpf_sk_cgroup_id, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_SOCKET, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, }; BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level) @@ -4163,7 +4161,7 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = { .func = bpf_sk_ancestor_cgroup_id, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_SOCKET, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, .arg2_type = ARG_ANYTHING, }; #endif @@ -5697,7 +5695,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { BPF_CALL_1(bpf_sk_release, struct sock *, sk) { - if (sk_is_refcounted(sk)) + if (sk && sk_is_refcounted(sk)) sock_gen_put(sk); return 0; } @@ -5706,7 +5704,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = { .func = bpf_sk_release, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, }; BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a22812561064..c96a56d9c3be 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2512,7 +2512,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * long bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(void *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -3234,11 +3234,11 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * u64 bpf_sk_cgroup_id(struct bpf_sock *sk) + * u64 bpf_sk_cgroup_id(void *sk) * Description * Return the cgroup v2 id of the socket *sk*. * - * *sk* must be a non-**NULL** pointer to a full socket, e.g. one + * *sk* must be a non-**NULL** pointer to a socket, e.g. one * returned from **bpf_sk_lookup_xxx**\ (), * **bpf_sk_fullsock**\ (), etc. The format of returned id is * same as in **bpf_skb_cgroup_id**\ (). @@ -3248,7 +3248,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level) + * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level) * Description * Return id of cgroup v2 that is ancestor of cgroup associated * with the *sk* at the *ancestor_level*. The root cgroup is at -- cgit v1.3.1 From 592a3498648af000e93dff2d36229ab11cd8c7f6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:02 -0700 Subject: bpf: Change bpf_sk_storage_*() to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON This patch changes the bpf_sk_storage_*() to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer returned by the bpf_skc_to_*() helpers also. A micro benchmark has been done on a "cgroup_skb/egress" bpf program which does a bpf_sk_storage_get(). It was driven by netperf doing a 4096 connected UDP_STREAM test with 64bytes packet. The stats from "kernel.bpf_stats_enabled" shows no meaningful difference. The sk_storage_get_btf_proto, sk_storage_delete_btf_proto, btf_sk_storage_get_proto, and btf_sk_storage_delete_proto are no longer needed, so they are removed. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20200925000402.3856307-1-kafai@fb.com --- include/net/bpf_sk_storage.h | 2 -- include/uapi/linux/bpf.h | 1 + kernel/bpf/bpf_lsm.c | 4 ++-- net/core/bpf_sk_storage.c | 29 ++++++----------------------- net/ipv4/bpf_tcp_ca.c | 23 ++--------------------- tools/include/uapi/linux/bpf.h | 1 + 6 files changed, 12 insertions(+), 48 deletions(-) (limited to 'tools') diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index 119f4c9c3a9c..3c516dd07caf 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -20,8 +20,6 @@ void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; -extern const struct bpf_func_proto sk_storage_get_btf_proto; -extern const struct bpf_func_proto sk_storage_delete_btf_proto; struct bpf_local_storage_elem; struct bpf_sk_storage_diag; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c96a56d9c3be..0ec6dbeb17a5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2861,6 +2861,7 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf-local-storage cannot be found. + * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). * * long bpf_send_signal(u32 sig) * Description diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 9cd1428c7199..78ea8a7bd27f 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -56,9 +56,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_inode_storage_delete: return &bpf_inode_storage_delete_proto; case BPF_FUNC_sk_storage_get: - return &sk_storage_get_btf_proto; + return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: - return &sk_storage_delete_btf_proto; + return &bpf_sk_storage_delete_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 838efc682cff..c907f0dc7f87 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -269,7 +269,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, { struct bpf_local_storage_data *sdata; - if (flags > BPF_SK_STORAGE_GET_F_CREATE) + if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) return (unsigned long)NULL; sdata = sk_storage_lookup(sk, map, true); @@ -299,6 +299,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) { + if (!sk || !sk_fullsock(sk)) + return -EINVAL; + if (refcount_inc_not_zero(&sk->sk_refcnt)) { int err; @@ -355,7 +358,7 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_SOCKET, + .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; @@ -375,27 +378,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_SOCKET, -}; - -const struct bpf_func_proto sk_storage_get_btf_proto = { - .func = bpf_sk_storage_get, - .gpl_only = false, - .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, - .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], - .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, - .arg4_type = ARG_ANYTHING, -}; - -const struct bpf_func_proto sk_storage_delete_btf_proto = { - .func = bpf_sk_storage_delete, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, - .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], + .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, }; struct bpf_sk_storage_diag { diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 74a2ef598c31..618954f82764 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -28,22 +28,6 @@ static u32 unsupported_ops[] = { static const struct btf_type *tcp_sock_type; static u32 tcp_sock_id, sock_id; -static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly; -static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly; - -static void convert_sk_func_proto(struct bpf_func_proto *to, const struct bpf_func_proto *from) -{ - int i; - - *to = *from; - for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) { - if (to->arg_type[i] == ARG_PTR_TO_SOCKET) { - to->arg_type[i] = ARG_PTR_TO_BTF_ID; - to->arg_btf_id[i] = &tcp_sock_id; - } - } -} - static int bpf_tcp_ca_init(struct btf *btf) { s32 type_id; @@ -59,9 +43,6 @@ static int bpf_tcp_ca_init(struct btf *btf) tcp_sock_id = type_id; tcp_sock_type = btf_type_by_id(btf, tcp_sock_id); - convert_sk_func_proto(&btf_sk_storage_get_proto, &bpf_sk_storage_get_proto); - convert_sk_func_proto(&btf_sk_storage_delete_proto, &bpf_sk_storage_delete_proto); - return 0; } @@ -188,9 +169,9 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, case BPF_FUNC_tcp_send_ack: return &bpf_tcp_send_ack_proto; case BPF_FUNC_sk_storage_get: - return &btf_sk_storage_get_proto; + return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: - return &btf_sk_storage_delete_proto; + return &bpf_sk_storage_delete_proto; default: return bpf_base_func_proto(func_id); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c96a56d9c3be..0ec6dbeb17a5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2861,6 +2861,7 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf-local-storage cannot be found. + * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). * * long bpf_send_signal(u32 sig) * Description -- cgit v1.3.1 From c0df236e1394970f3503a8fb103de95d000014ca Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:09 -0700 Subject: bpf: Change bpf_tcp_*_syncookie to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON This patch changes the bpf_tcp_*_syncookie() to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer returned by the bpf_skc_to_*() helpers also. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20200925000409.3856725-1-kafai@fb.com --- include/uapi/linux/bpf.h | 4 ++-- net/core/filter.c | 8 ++++---- tools/include/uapi/linux/bpf.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0ec6dbeb17a5..69b9e30375bc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2692,7 +2692,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2878,7 +2878,7 @@ union bpf_attr { * * **-EAGAIN** if bpf program can try again. * - * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Try to issue a SYN cookie for the packet with corresponding * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. diff --git a/net/core/filter.c b/net/core/filter.c index 06d397eeef2a..1d88e9b498eb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6086,7 +6086,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len u32 cookie; int ret; - if (unlikely(th_len < sizeof(*th))) + if (unlikely(!sk || th_len < sizeof(*th))) return -EINVAL; /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */ @@ -6139,7 +6139,7 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { .gpl_only = true, .pkt_access = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_PTR_TO_MEM, @@ -6153,7 +6153,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, u32 cookie; u16 mss; - if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4)) return -EINVAL; if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) @@ -6208,7 +6208,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { .gpl_only = true, /* __cookie_v*_init_sequence() is GPL */ .pkt_access = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, .arg2_type = ARG_PTR_TO_MEM, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_PTR_TO_MEM, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0ec6dbeb17a5..69b9e30375bc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2692,7 +2692,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2878,7 +2878,7 @@ union bpf_attr { * * **-EAGAIN** if bpf program can try again. * - * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Try to issue a SYN cookie for the packet with corresponding * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. -- cgit v1.3.1 From 27e5203bd9c5cc6d54dcac48c3027f3f04522b8b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:15 -0700 Subject: bpf: Change bpf_sk_assign to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON This patch changes the bpf_sk_assign() to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer returned by the bpf_skc_to_*() helpers also. The bpf_sk_lookup_assign() is taking ARG_PTR_TO_SOCKET_"OR_NULL". Meaning it specifically takes a literal NULL. ARG_PTR_TO_BTF_ID_SOCK_COMMON does not allow a literal NULL, so another ARG type is required for this purpose and another follow-up patch can be used if there is such need. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000415.3857374-1-kafai@fb.com --- include/uapi/linux/bpf.h | 2 +- net/core/filter.c | 4 ++-- tools/include/uapi/linux/bpf.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 69b9e30375bc..2d6519a2ed77 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3107,7 +3107,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags) * Description * Helper is overloaded depending on BPF program type. This * description applies to **BPF_PROG_TYPE_SCHED_CLS** and diff --git a/net/core/filter.c b/net/core/filter.c index 1d88e9b498eb..af88935e24b1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6217,7 +6217,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags) { - if (flags != 0) + if (!sk || flags != 0) return -EINVAL; if (!skb_at_tc_ingress(skb)) return -EOPNOTSUPP; @@ -6241,7 +6241,7 @@ static const struct bpf_func_proto bpf_sk_assign_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_SOCK_COMMON, + .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, .arg3_type = ARG_ANYTHING, }; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 69b9e30375bc..2d6519a2ed77 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3107,7 +3107,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags) * Description * Helper is overloaded depending on BPF program type. This * description applies to **BPF_PROG_TYPE_SCHED_CLS** and -- cgit v1.3.1 From 5d13746dd8359e1d0b354b557ebb36e8e1939a6c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:21 -0700 Subject: bpf: selftest: Add ref_tracking verifier test for bpf_skc casting The patch tests for: 1. bpf_sk_release() can be called on a tcp_sock btf_id ptr. 2. Ensure the tcp_sock btf_id pointer cannot be used after bpf_sk_release(). Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20200925000421.3857616-1-kafai@fb.com --- .../testing/selftests/bpf/verifier/ref_tracking.c | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 056e0273bf12..006b5bd99c08 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -854,3 +854,50 @@ .errstr = "Unreleased reference", .result = REJECT, }, +{ + "reference tracking: bpf_sk_release(btf_tcp_sock)", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, +{ + "reference tracking: use ptr from bpf_skc_to_tcp_sock() after release", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, -- cgit v1.3.1 From 6f521a2bd26870bcd14aac42ff2ce0b1a22ae477 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:27 -0700 Subject: bpf: selftest: Move sock_fields test into test_progs This is a mechanical change to 1. move test_sock_fields.c to prog_tests/sock_fields.c 2. rename progs/test_sock_fields_kern.c to progs/test_sock_fields.c Minimal change is made to the code itself. Next patch will make changes to use new ways of writing test, e.g. use skel and global variables. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000427.3857814-1-kafai@fb.com --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 2 +- .../testing/selftests/bpf/prog_tests/sock_fields.c | 480 ++++++++++++++++++++ .../testing/selftests/bpf/progs/test_sock_fields.c | 255 +++++++++++ .../selftests/bpf/progs/test_sock_fields_kern.c | 255 ----------- tools/testing/selftests/bpf/test_sock_fields.c | 482 --------------------- 6 files changed, 736 insertions(+), 739 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/sock_fields.c create mode 100644 tools/testing/selftests/bpf/progs/test_sock_fields.c delete mode 100644 tools/testing/selftests/bpf/progs/test_sock_fields_kern.c delete mode 100644 tools/testing/selftests/bpf/test_sock_fields.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e8fed558b8b8..3ab1200e172f 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -13,7 +13,6 @@ test_verifier_log feature test_sock test_sock_addr -test_sock_fields urandom_read test_sockmap test_lirc_mode2_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 59a5fa5fe837..bdbeafec371b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,7 +35,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ - test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ + test_netcnt test_tcpnotify_user test_sysctl \ test_progs-no_alu32 \ test_current_pid_tgid_new_ns diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c new file mode 100644 index 000000000000..1138223780fc --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "cgroup_helpers.h" +#include "bpf_rlimit.h" + +enum bpf_addr_array_idx { + ADDR_SRV_IDX, + ADDR_CLI_IDX, + __NR_BPF_ADDR_ARRAY_IDX, +}; + +enum bpf_result_array_idx { + EGRESS_SRV_IDX, + EGRESS_CLI_IDX, + INGRESS_LISTEN_IDX, + __NR_BPF_RESULT_ARRAY_IDX, +}; + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, +}; + +struct bpf_spinlock_cnt { + struct bpf_spin_lock lock; + __u32 cnt; +}; + +#define CHECK(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ + printf(format); \ + printf("\n"); \ + exit(-1); \ + } \ +}) + +#define TEST_CGROUP "/test-bpf-sock-fields" +#define DATA "Hello BPF!" +#define DATA_LEN sizeof(DATA) + +static struct sockaddr_in6 srv_sa6, cli_sa6; +static int sk_pkt_out_cnt10_fd; +static int sk_pkt_out_cnt_fd; +static int linum_map_fd; +static int addr_map_fd; +static int tp_map_fd; +static int sk_map_fd; + +static __u32 addr_srv_idx = ADDR_SRV_IDX; +static __u32 addr_cli_idx = ADDR_CLI_IDX; + +static __u32 egress_srv_idx = EGRESS_SRV_IDX; +static __u32 egress_cli_idx = EGRESS_CLI_IDX; +static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; + +static __u32 egress_linum_idx = EGRESS_LINUM_IDX; +static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; + +static void init_loopback6(struct sockaddr_in6 *sa6) +{ + memset(sa6, 0, sizeof(*sa6)); + sa6->sin6_family = AF_INET6; + sa6->sin6_addr = in6addr_loopback; +} + +static void print_sk(const struct bpf_sock *sk) +{ + char src_ip4[24], dst_ip4[24]; + char src_ip6[64], dst_ip6[64]; + + inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); + inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); + inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); + inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); + + printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " + "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " + "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", + sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, + sk->mark, sk->priority, + sk->src_ip4, src_ip4, + sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], + src_ip6, sk->src_port, + sk->dst_ip4, dst_ip4, + sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], + dst_ip6, ntohs(sk->dst_port)); +} + +static void print_tp(const struct bpf_tcp_sock *tp) +{ + printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " + "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " + "rate_delivered:%u rate_interval_us:%u packets_out:%u " + "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " + "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " + "bytes_received:%llu bytes_acked:%llu\n", + tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, + tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, + tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, + tp->packets_out, tp->retrans_out, tp->total_retrans, + tp->segs_in, tp->data_segs_in, tp->segs_out, + tp->data_segs_out, tp->lost_out, tp->sacked_out, + tp->bytes_received, tp->bytes_acked); +} + +static void check_result(void) +{ + struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; + struct bpf_sock srv_sk, cli_sk, listen_sk; + __u32 ingress_linum, egress_linum; + int err; + + err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, + &egress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, + &ingress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", + "err:%d errno:%d", err, errno); + + printf("listen_sk: "); + print_sk(&listen_sk); + printf("\n"); + + printf("srv_sk: "); + print_sk(&srv_sk); + printf("\n"); + + printf("cli_sk: "); + print_sk(&cli_sk); + printf("\n"); + + printf("listen_tp: "); + print_tp(&listen_tp); + printf("\n"); + + printf("srv_tp: "); + print_tp(&srv_tp); + printf("\n"); + + printf("cli_tp: "); + print_tp(&cli_tp); + printf("\n"); + + CHECK(listen_sk.state != 10 || + listen_sk.family != AF_INET6 || + listen_sk.protocol != IPPROTO_TCP || + memcmp(listen_sk.src_ip6, &in6addr_loopback, + sizeof(listen_sk.src_ip6)) || + listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || + listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || + listen_sk.src_port != ntohs(srv_sa6.sin6_port) || + listen_sk.dst_port, + "Unexpected listen_sk", + "Check listen_sk output. ingress_linum:%u", + ingress_linum); + + CHECK(srv_sk.state == 10 || + !srv_sk.state || + srv_sk.family != AF_INET6 || + srv_sk.protocol != IPPROTO_TCP || + memcmp(srv_sk.src_ip6, &in6addr_loopback, + sizeof(srv_sk.src_ip6)) || + memcmp(srv_sk.dst_ip6, &in6addr_loopback, + sizeof(srv_sk.dst_ip6)) || + srv_sk.src_port != ntohs(srv_sa6.sin6_port) || + srv_sk.dst_port != cli_sa6.sin6_port, + "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", + egress_linum); + + CHECK(cli_sk.state == 10 || + !cli_sk.state || + cli_sk.family != AF_INET6 || + cli_sk.protocol != IPPROTO_TCP || + memcmp(cli_sk.src_ip6, &in6addr_loopback, + sizeof(cli_sk.src_ip6)) || + memcmp(cli_sk.dst_ip6, &in6addr_loopback, + sizeof(cli_sk.dst_ip6)) || + cli_sk.src_port != ntohs(cli_sa6.sin6_port) || + cli_sk.dst_port != srv_sa6.sin6_port, + "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", + egress_linum); + + CHECK(listen_tp.data_segs_out || + listen_tp.data_segs_in || + listen_tp.total_retrans || + listen_tp.bytes_acked, + "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", + ingress_linum); + + CHECK(srv_tp.data_segs_out != 2 || + srv_tp.data_segs_in || + srv_tp.snd_cwnd != 10 || + srv_tp.total_retrans || + srv_tp.bytes_acked != 2 * DATA_LEN, + "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", + egress_linum); + + CHECK(cli_tp.data_segs_out || + cli_tp.data_segs_in != 2 || + cli_tp.snd_cwnd != 10 || + cli_tp.total_retrans || + cli_tp.bytes_received != 2 * DATA_LEN, + "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", + egress_linum); +} + +static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) +{ + struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; + int err; + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, + &pkt_out_cnt10); + + /* The bpf prog only counts for fullsock and + * passive conneciton did not become fullsock until 3WHS + * had been finished. + * The bpf prog only counted two data packet out but we + * specially init accept_fd's pkt_out_cnt by 2 in + * init_sk_storage(). Hence, 4 here. + */ + CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, + &pkt_out_cnt10); + /* Active connection is fullsock from the beginning. + * 1 SYN and 1 ACK during 3WHS + * 2 Acks on data packet. + * + * The bpf_prog initialized it to 0xeB9F. + */ + CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || + pkt_out_cnt10.cnt != 0xeB9F + 40, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); +} + +static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) +{ + struct bpf_spinlock_cnt scnt = {}; + int err; + + scnt.cnt = pkt_out_cnt; + err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, + BPF_NOEXIST); + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", + "err:%d errno:%d", err, errno); + + scnt.cnt *= 10; + err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, + BPF_NOEXIST); + CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", + "err:%d errno:%d", err, errno); +} + +static void test(void) +{ + int listen_fd, cli_fd, accept_fd, epfd, err; + struct epoll_event ev; + socklen_t addrlen; + int i; + + addrlen = sizeof(struct sockaddr_in6); + ev.events = EPOLLIN; + + epfd = epoll_create(1); + CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); + + /* Prepare listen_fd */ + listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", + listen_fd, errno); + + init_loopback6(&srv_sa6); + err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); + CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); + + err = listen(listen_fd, 1); + CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); + + /* Prepare cli_fd */ + cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); + + init_loopback6(&cli_sa6); + err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); + CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); + + err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); + CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", + err, errno); + + /* Update addr_map with srv_sa6 and cli_sa6 */ + err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); + CHECK(err, "map_update", "err:%d errno:%d", err, errno); + + err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); + CHECK(err, "map_update", "err:%d errno:%d", err, errno); + + /* Connect from cli_sa6 to srv_sa6 */ + err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); + printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", + ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); + CHECK(err && errno != EINPROGRESS, + "connect(cli_fd)", "err:%d errno:%d", err, errno); + + ev.data.fd = listen_fd; + err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); + CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", + err, errno); + + /* Accept the connection */ + /* Have some timeout in accept(listen_fd). Just in case. */ + err = epoll_wait(epfd, &ev, 1, 1000); + CHECK(err != 1 || ev.data.fd != listen_fd, + "epoll_wait(listen_fd)", + "err:%d errno:%d ev.data.fd:%d listen_fd:%d", + err, errno, ev.data.fd, listen_fd); + + accept_fd = accept(listen_fd, NULL, NULL); + CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", + accept_fd, errno); + close(listen_fd); + + ev.data.fd = cli_fd; + err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); + CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", + err, errno); + + init_sk_storage(accept_fd, 2); + + for (i = 0; i < 2; i++) { + /* Send some data from accept_fd to cli_fd */ + err = send(accept_fd, DATA, DATA_LEN, 0); + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", + err, errno); + + /* Have some timeout in recv(cli_fd). Just in case. */ + err = epoll_wait(epfd, &ev, 1, 1000); + CHECK(err != 1 || ev.data.fd != cli_fd, + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", + err, errno, ev.data.fd, cli_fd); + + err = recv(cli_fd, NULL, 0, MSG_TRUNC); + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); + } + + check_sk_pkt_out_cnt(accept_fd, cli_fd); + + close(epfd); + close(accept_fd); + close(cli_fd); + + check_result(); +} + +void test_sock_fields(void) +{ + struct bpf_prog_load_attr attr = { + .file = "test_sock_fields.o", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .prog_flags = BPF_F_TEST_RND_HI32, + }; + int cgroup_fd, egress_fd, ingress_fd, err; + struct bpf_program *ingress_prog; + struct bpf_object *obj; + struct bpf_map *map; + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + CHECK(cgroup_fd < 0, "cgroup_setup_and_join()", + "cgroup_fd:%d errno:%d", cgroup_fd, errno); + atexit(cleanup_cgroup_environment); + + err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); + CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); + + ingress_prog = bpf_object__find_program_by_title(obj, + "cgroup_skb/ingress"); + CHECK(!ingress_prog, + "bpf_object__find_program_by_title(cgroup_skb/ingress)", + "not found"); + ingress_fd = bpf_program__fd(ingress_prog); + + err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); + CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", + "err:%d errno%d", err, errno); + + err = bpf_prog_attach(ingress_fd, cgroup_fd, + BPF_CGROUP_INET_INGRESS, 0); + CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", + "err:%d errno%d", err, errno); + close(cgroup_fd); + + map = bpf_object__find_map_by_name(obj, "addr_map"); + CHECK(!map, "cannot find addr_map", "(null)"); + addr_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "sock_result_map"); + CHECK(!map, "cannot find sock_result_map", "(null)"); + sk_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); + CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); + tp_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "linum_map"); + CHECK(!map, "cannot find linum_map", "(null)"); + linum_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); + CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); + sk_pkt_out_cnt_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); + CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); + sk_pkt_out_cnt10_fd = bpf_map__fd(map); + + test(); + + bpf_object__close(obj); + cleanup_cgroup_environment(); + + printf("PASS\n"); +} diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c new file mode 100644 index 000000000000..9bcaa37f476a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include +#include +#include + +#include +#include + +enum bpf_addr_array_idx { + ADDR_SRV_IDX, + ADDR_CLI_IDX, + __NR_BPF_ADDR_ARRAY_IDX, +}; + +enum bpf_result_array_idx { + EGRESS_SRV_IDX, + EGRESS_CLI_IDX, + INGRESS_LISTEN_IDX, + __NR_BPF_RESULT_ARRAY_IDX, +}; + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX); + __type(key, __u32); + __type(value, struct sockaddr_in6); +} addr_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); + __type(key, __u32); + __type(value, struct bpf_sock); +} sock_result_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); + __type(key, __u32); + __type(value, struct bpf_tcp_sock); +} tcp_sock_result_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX); + __type(key, __u32); + __type(value, __u32); +} linum_map SEC(".maps"); + +struct bpf_spinlock_cnt { + struct bpf_spin_lock lock; + __u32 cnt; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct bpf_spinlock_cnt); +} sk_pkt_out_cnt SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct bpf_spinlock_cnt); +} sk_pkt_out_cnt10 SEC(".maps"); + +static bool is_loopback6(__u32 *a6) +{ + return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); +} + +static void skcpy(struct bpf_sock *dst, + const struct bpf_sock *src) +{ + dst->bound_dev_if = src->bound_dev_if; + dst->family = src->family; + dst->type = src->type; + dst->protocol = src->protocol; + dst->mark = src->mark; + dst->priority = src->priority; + dst->src_ip4 = src->src_ip4; + dst->src_ip6[0] = src->src_ip6[0]; + dst->src_ip6[1] = src->src_ip6[1]; + dst->src_ip6[2] = src->src_ip6[2]; + dst->src_ip6[3] = src->src_ip6[3]; + dst->src_port = src->src_port; + dst->dst_ip4 = src->dst_ip4; + dst->dst_ip6[0] = src->dst_ip6[0]; + dst->dst_ip6[1] = src->dst_ip6[1]; + dst->dst_ip6[2] = src->dst_ip6[2]; + dst->dst_ip6[3] = src->dst_ip6[3]; + dst->dst_port = src->dst_port; + dst->state = src->state; +} + +static void tpcpy(struct bpf_tcp_sock *dst, + const struct bpf_tcp_sock *src) +{ + dst->snd_cwnd = src->snd_cwnd; + dst->srtt_us = src->srtt_us; + dst->rtt_min = src->rtt_min; + dst->snd_ssthresh = src->snd_ssthresh; + dst->rcv_nxt = src->rcv_nxt; + dst->snd_nxt = src->snd_nxt; + dst->snd_una = src->snd_una; + dst->mss_cache = src->mss_cache; + dst->ecn_flags = src->ecn_flags; + dst->rate_delivered = src->rate_delivered; + dst->rate_interval_us = src->rate_interval_us; + dst->packets_out = src->packets_out; + dst->retrans_out = src->retrans_out; + dst->total_retrans = src->total_retrans; + dst->segs_in = src->segs_in; + dst->data_segs_in = src->data_segs_in; + dst->segs_out = src->segs_out; + dst->data_segs_out = src->data_segs_out; + dst->lost_out = src->lost_out; + dst->sacked_out = src->sacked_out; + dst->bytes_received = src->bytes_received; + dst->bytes_acked = src->bytes_acked; +} + +#define RETURN { \ + linum = __LINE__; \ + bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \ + return 1; \ +} + +SEC("cgroup_skb/egress") +int egress_read_sock_fields(struct __sk_buff *skb) +{ + struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; + __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; + struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; + struct sockaddr_in6 *srv_sa6, *cli_sa6; + struct bpf_tcp_sock *tp, *tp_ret; + struct bpf_sock *sk, *sk_ret; + __u32 linum, linum_idx; + + linum_idx = EGRESS_LINUM_IDX; + + sk = skb->sk; + if (!sk || sk->state == 10) + RETURN; + + sk = bpf_sk_fullsock(sk); + if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || + !is_loopback6(sk->src_ip6)) + RETURN; + + tp = bpf_tcp_sock(sk); + if (!tp) + RETURN; + + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); + cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); + if (!srv_sa6 || !cli_sa6) + RETURN; + + if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) + result_idx = EGRESS_SRV_IDX; + else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) + result_idx = EGRESS_CLI_IDX; + else + RETURN; + + sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); + if (!sk_ret || !tp_ret) + RETURN; + + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); + + if (result_idx == EGRESS_SRV_IDX) { + /* The userspace has created it for srv sk */ + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, + 0, 0); + } else { + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, + &cli_cnt_init, + BPF_SK_STORAGE_GET_F_CREATE); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, + sk, &cli_cnt_init, + BPF_SK_STORAGE_GET_F_CREATE); + } + + if (!pkt_out_cnt || !pkt_out_cnt10) + RETURN; + + /* Even both cnt and cnt10 have lock defined in their BTF, + * intentionally one cnt takes lock while one does not + * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE. + */ + pkt_out_cnt->cnt += 1; + bpf_spin_lock(&pkt_out_cnt10->lock); + pkt_out_cnt10->cnt += 10; + bpf_spin_unlock(&pkt_out_cnt10->lock); + + RETURN; +} + +SEC("cgroup_skb/ingress") +int ingress_read_sock_fields(struct __sk_buff *skb) +{ + __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX; + struct bpf_tcp_sock *tp, *tp_ret; + struct bpf_sock *sk, *sk_ret; + struct sockaddr_in6 *srv_sa6; + __u32 linum, linum_idx; + + linum_idx = INGRESS_LINUM_IDX; + + sk = skb->sk; + if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6)) + RETURN; + + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); + if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port)) + RETURN; + + if (sk->state != 10 && sk->state != 12) + RETURN; + + sk = bpf_get_listener_sock(sk); + if (!sk) + RETURN; + + tp = bpf_tcp_sock(sk); + if (!tp) + RETURN; + + sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); + if (!sk_ret || !tp_ret) + RETURN; + + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); + + RETURN; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c deleted file mode 100644 index 9bcaa37f476a..000000000000 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ /dev/null @@ -1,255 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019 Facebook */ - -#include -#include -#include - -#include -#include - -enum bpf_addr_array_idx { - ADDR_SRV_IDX, - ADDR_CLI_IDX, - __NR_BPF_ADDR_ARRAY_IDX, -}; - -enum bpf_result_array_idx { - EGRESS_SRV_IDX, - EGRESS_CLI_IDX, - INGRESS_LISTEN_IDX, - __NR_BPF_RESULT_ARRAY_IDX, -}; - -enum bpf_linum_array_idx { - EGRESS_LINUM_IDX, - INGRESS_LINUM_IDX, - __NR_BPF_LINUM_ARRAY_IDX, -}; - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX); - __type(key, __u32); - __type(value, struct sockaddr_in6); -} addr_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); - __type(key, __u32); - __type(value, struct bpf_sock); -} sock_result_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); - __type(key, __u32); - __type(value, struct bpf_tcp_sock); -} tcp_sock_result_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX); - __type(key, __u32); - __type(value, __u32); -} linum_map SEC(".maps"); - -struct bpf_spinlock_cnt { - struct bpf_spin_lock lock; - __u32 cnt; -}; - -struct { - __uint(type, BPF_MAP_TYPE_SK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); - __type(key, int); - __type(value, struct bpf_spinlock_cnt); -} sk_pkt_out_cnt SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_SK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); - __type(key, int); - __type(value, struct bpf_spinlock_cnt); -} sk_pkt_out_cnt10 SEC(".maps"); - -static bool is_loopback6(__u32 *a6) -{ - return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); -} - -static void skcpy(struct bpf_sock *dst, - const struct bpf_sock *src) -{ - dst->bound_dev_if = src->bound_dev_if; - dst->family = src->family; - dst->type = src->type; - dst->protocol = src->protocol; - dst->mark = src->mark; - dst->priority = src->priority; - dst->src_ip4 = src->src_ip4; - dst->src_ip6[0] = src->src_ip6[0]; - dst->src_ip6[1] = src->src_ip6[1]; - dst->src_ip6[2] = src->src_ip6[2]; - dst->src_ip6[3] = src->src_ip6[3]; - dst->src_port = src->src_port; - dst->dst_ip4 = src->dst_ip4; - dst->dst_ip6[0] = src->dst_ip6[0]; - dst->dst_ip6[1] = src->dst_ip6[1]; - dst->dst_ip6[2] = src->dst_ip6[2]; - dst->dst_ip6[3] = src->dst_ip6[3]; - dst->dst_port = src->dst_port; - dst->state = src->state; -} - -static void tpcpy(struct bpf_tcp_sock *dst, - const struct bpf_tcp_sock *src) -{ - dst->snd_cwnd = src->snd_cwnd; - dst->srtt_us = src->srtt_us; - dst->rtt_min = src->rtt_min; - dst->snd_ssthresh = src->snd_ssthresh; - dst->rcv_nxt = src->rcv_nxt; - dst->snd_nxt = src->snd_nxt; - dst->snd_una = src->snd_una; - dst->mss_cache = src->mss_cache; - dst->ecn_flags = src->ecn_flags; - dst->rate_delivered = src->rate_delivered; - dst->rate_interval_us = src->rate_interval_us; - dst->packets_out = src->packets_out; - dst->retrans_out = src->retrans_out; - dst->total_retrans = src->total_retrans; - dst->segs_in = src->segs_in; - dst->data_segs_in = src->data_segs_in; - dst->segs_out = src->segs_out; - dst->data_segs_out = src->data_segs_out; - dst->lost_out = src->lost_out; - dst->sacked_out = src->sacked_out; - dst->bytes_received = src->bytes_received; - dst->bytes_acked = src->bytes_acked; -} - -#define RETURN { \ - linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \ - return 1; \ -} - -SEC("cgroup_skb/egress") -int egress_read_sock_fields(struct __sk_buff *skb) -{ - struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; - __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; - struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; - struct sockaddr_in6 *srv_sa6, *cli_sa6; - struct bpf_tcp_sock *tp, *tp_ret; - struct bpf_sock *sk, *sk_ret; - __u32 linum, linum_idx; - - linum_idx = EGRESS_LINUM_IDX; - - sk = skb->sk; - if (!sk || sk->state == 10) - RETURN; - - sk = bpf_sk_fullsock(sk); - if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || - !is_loopback6(sk->src_ip6)) - RETURN; - - tp = bpf_tcp_sock(sk); - if (!tp) - RETURN; - - srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); - cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); - if (!srv_sa6 || !cli_sa6) - RETURN; - - if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) - result_idx = EGRESS_SRV_IDX; - else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) - result_idx = EGRESS_CLI_IDX; - else - RETURN; - - sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); - if (!sk_ret || !tp_ret) - RETURN; - - skcpy(sk_ret, sk); - tpcpy(tp_ret, tp); - - if (result_idx == EGRESS_SRV_IDX) { - /* The userspace has created it for srv sk */ - pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); - pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, - 0, 0); - } else { - pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, - &cli_cnt_init, - BPF_SK_STORAGE_GET_F_CREATE); - pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, - sk, &cli_cnt_init, - BPF_SK_STORAGE_GET_F_CREATE); - } - - if (!pkt_out_cnt || !pkt_out_cnt10) - RETURN; - - /* Even both cnt and cnt10 have lock defined in their BTF, - * intentionally one cnt takes lock while one does not - * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE. - */ - pkt_out_cnt->cnt += 1; - bpf_spin_lock(&pkt_out_cnt10->lock); - pkt_out_cnt10->cnt += 10; - bpf_spin_unlock(&pkt_out_cnt10->lock); - - RETURN; -} - -SEC("cgroup_skb/ingress") -int ingress_read_sock_fields(struct __sk_buff *skb) -{ - __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX; - struct bpf_tcp_sock *tp, *tp_ret; - struct bpf_sock *sk, *sk_ret; - struct sockaddr_in6 *srv_sa6; - __u32 linum, linum_idx; - - linum_idx = INGRESS_LINUM_IDX; - - sk = skb->sk; - if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6)) - RETURN; - - srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); - if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port)) - RETURN; - - if (sk->state != 10 && sk->state != 12) - RETURN; - - sk = bpf_get_listener_sock(sk); - if (!sk) - RETURN; - - tp = bpf_tcp_sock(sk); - if (!tp) - RETURN; - - sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); - if (!sk_ret || !tp_ret) - RETURN; - - skcpy(sk_ret, sk); - tpcpy(tp_ret, tp); - - RETURN; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c deleted file mode 100644 index 6c9f269c396d..000000000000 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ /dev/null @@ -1,482 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019 Facebook */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "cgroup_helpers.h" -#include "bpf_rlimit.h" - -enum bpf_addr_array_idx { - ADDR_SRV_IDX, - ADDR_CLI_IDX, - __NR_BPF_ADDR_ARRAY_IDX, -}; - -enum bpf_result_array_idx { - EGRESS_SRV_IDX, - EGRESS_CLI_IDX, - INGRESS_LISTEN_IDX, - __NR_BPF_RESULT_ARRAY_IDX, -}; - -enum bpf_linum_array_idx { - EGRESS_LINUM_IDX, - INGRESS_LINUM_IDX, - __NR_BPF_LINUM_ARRAY_IDX, -}; - -struct bpf_spinlock_cnt { - struct bpf_spin_lock lock; - __u32 cnt; -}; - -#define CHECK(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ - printf(format); \ - printf("\n"); \ - exit(-1); \ - } \ -}) - -#define TEST_CGROUP "/test-bpf-sock-fields" -#define DATA "Hello BPF!" -#define DATA_LEN sizeof(DATA) - -static struct sockaddr_in6 srv_sa6, cli_sa6; -static int sk_pkt_out_cnt10_fd; -static int sk_pkt_out_cnt_fd; -static int linum_map_fd; -static int addr_map_fd; -static int tp_map_fd; -static int sk_map_fd; - -static __u32 addr_srv_idx = ADDR_SRV_IDX; -static __u32 addr_cli_idx = ADDR_CLI_IDX; - -static __u32 egress_srv_idx = EGRESS_SRV_IDX; -static __u32 egress_cli_idx = EGRESS_CLI_IDX; -static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; - -static __u32 egress_linum_idx = EGRESS_LINUM_IDX; -static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; - -static void init_loopback6(struct sockaddr_in6 *sa6) -{ - memset(sa6, 0, sizeof(*sa6)); - sa6->sin6_family = AF_INET6; - sa6->sin6_addr = in6addr_loopback; -} - -static void print_sk(const struct bpf_sock *sk) -{ - char src_ip4[24], dst_ip4[24]; - char src_ip6[64], dst_ip6[64]; - - inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); - inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); - inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); - inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); - - printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " - "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " - "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", - sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, - sk->mark, sk->priority, - sk->src_ip4, src_ip4, - sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], - src_ip6, sk->src_port, - sk->dst_ip4, dst_ip4, - sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], - dst_ip6, ntohs(sk->dst_port)); -} - -static void print_tp(const struct bpf_tcp_sock *tp) -{ - printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " - "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " - "rate_delivered:%u rate_interval_us:%u packets_out:%u " - "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " - "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " - "bytes_received:%llu bytes_acked:%llu\n", - tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, - tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, - tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, - tp->packets_out, tp->retrans_out, tp->total_retrans, - tp->segs_in, tp->data_segs_in, tp->segs_out, - tp->data_segs_out, tp->lost_out, tp->sacked_out, - tp->bytes_received, tp->bytes_acked); -} - -static void check_result(void) -{ - struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; - struct bpf_sock srv_sk, cli_sk, listen_sk; - __u32 ingress_linum, egress_linum; - int err; - - err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, - &egress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, - &ingress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", - "err:%d errno:%d", err, errno); - - printf("listen_sk: "); - print_sk(&listen_sk); - printf("\n"); - - printf("srv_sk: "); - print_sk(&srv_sk); - printf("\n"); - - printf("cli_sk: "); - print_sk(&cli_sk); - printf("\n"); - - printf("listen_tp: "); - print_tp(&listen_tp); - printf("\n"); - - printf("srv_tp: "); - print_tp(&srv_tp); - printf("\n"); - - printf("cli_tp: "); - print_tp(&cli_tp); - printf("\n"); - - CHECK(listen_sk.state != 10 || - listen_sk.family != AF_INET6 || - listen_sk.protocol != IPPROTO_TCP || - memcmp(listen_sk.src_ip6, &in6addr_loopback, - sizeof(listen_sk.src_ip6)) || - listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || - listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || - listen_sk.src_port != ntohs(srv_sa6.sin6_port) || - listen_sk.dst_port, - "Unexpected listen_sk", - "Check listen_sk output. ingress_linum:%u", - ingress_linum); - - CHECK(srv_sk.state == 10 || - !srv_sk.state || - srv_sk.family != AF_INET6 || - srv_sk.protocol != IPPROTO_TCP || - memcmp(srv_sk.src_ip6, &in6addr_loopback, - sizeof(srv_sk.src_ip6)) || - memcmp(srv_sk.dst_ip6, &in6addr_loopback, - sizeof(srv_sk.dst_ip6)) || - srv_sk.src_port != ntohs(srv_sa6.sin6_port) || - srv_sk.dst_port != cli_sa6.sin6_port, - "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", - egress_linum); - - CHECK(cli_sk.state == 10 || - !cli_sk.state || - cli_sk.family != AF_INET6 || - cli_sk.protocol != IPPROTO_TCP || - memcmp(cli_sk.src_ip6, &in6addr_loopback, - sizeof(cli_sk.src_ip6)) || - memcmp(cli_sk.dst_ip6, &in6addr_loopback, - sizeof(cli_sk.dst_ip6)) || - cli_sk.src_port != ntohs(cli_sa6.sin6_port) || - cli_sk.dst_port != srv_sa6.sin6_port, - "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", - egress_linum); - - CHECK(listen_tp.data_segs_out || - listen_tp.data_segs_in || - listen_tp.total_retrans || - listen_tp.bytes_acked, - "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", - ingress_linum); - - CHECK(srv_tp.data_segs_out != 2 || - srv_tp.data_segs_in || - srv_tp.snd_cwnd != 10 || - srv_tp.total_retrans || - srv_tp.bytes_acked != 2 * DATA_LEN, - "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", - egress_linum); - - CHECK(cli_tp.data_segs_out || - cli_tp.data_segs_in != 2 || - cli_tp.snd_cwnd != 10 || - cli_tp.total_retrans || - cli_tp.bytes_received != 2 * DATA_LEN, - "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", - egress_linum); -} - -static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) -{ - struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; - int err; - - pkt_out_cnt.cnt = ~0; - pkt_out_cnt10.cnt = ~0; - err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); - if (!err) - err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, - &pkt_out_cnt10); - - /* The bpf prog only counts for fullsock and - * passive conneciton did not become fullsock until 3WHS - * had been finished. - * The bpf prog only counted two data packet out but we - * specially init accept_fd's pkt_out_cnt by 2 in - * init_sk_storage(). Hence, 4 here. - */ - CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, - "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", - "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", - err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); - - pkt_out_cnt.cnt = ~0; - pkt_out_cnt10.cnt = ~0; - err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); - if (!err) - err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, - &pkt_out_cnt10); - /* Active connection is fullsock from the beginning. - * 1 SYN and 1 ACK during 3WHS - * 2 Acks on data packet. - * - * The bpf_prog initialized it to 0xeB9F. - */ - CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || - pkt_out_cnt10.cnt != 0xeB9F + 40, - "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", - "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", - err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); -} - -static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) -{ - struct bpf_spinlock_cnt scnt = {}; - int err; - - scnt.cnt = pkt_out_cnt; - err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, - BPF_NOEXIST); - CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", - "err:%d errno:%d", err, errno); - - scnt.cnt *= 10; - err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, - BPF_NOEXIST); - CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", - "err:%d errno:%d", err, errno); -} - -static void test(void) -{ - int listen_fd, cli_fd, accept_fd, epfd, err; - struct epoll_event ev; - socklen_t addrlen; - int i; - - addrlen = sizeof(struct sockaddr_in6); - ev.events = EPOLLIN; - - epfd = epoll_create(1); - CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); - - /* Prepare listen_fd */ - listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", - listen_fd, errno); - - init_loopback6(&srv_sa6); - err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); - CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); - - err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); - CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); - - err = listen(listen_fd, 1); - CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); - - /* Prepare cli_fd */ - cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); - - init_loopback6(&cli_sa6); - err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); - CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); - - err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); - CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", - err, errno); - - /* Update addr_map with srv_sa6 and cli_sa6 */ - err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); - CHECK(err, "map_update", "err:%d errno:%d", err, errno); - - err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); - CHECK(err, "map_update", "err:%d errno:%d", err, errno); - - /* Connect from cli_sa6 to srv_sa6 */ - err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); - printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", - ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); - CHECK(err && errno != EINPROGRESS, - "connect(cli_fd)", "err:%d errno:%d", err, errno); - - ev.data.fd = listen_fd; - err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", - err, errno); - - /* Accept the connection */ - /* Have some timeout in accept(listen_fd). Just in case. */ - err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != listen_fd, - "epoll_wait(listen_fd)", - "err:%d errno:%d ev.data.fd:%d listen_fd:%d", - err, errno, ev.data.fd, listen_fd); - - accept_fd = accept(listen_fd, NULL, NULL); - CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", - accept_fd, errno); - close(listen_fd); - - ev.data.fd = cli_fd; - err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", - err, errno); - - init_sk_storage(accept_fd, 2); - - for (i = 0; i < 2; i++) { - /* Send some data from accept_fd to cli_fd */ - err = send(accept_fd, DATA, DATA_LEN, 0); - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", - err, errno); - - /* Have some timeout in recv(cli_fd). Just in case. */ - err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != cli_fd, - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", - err, errno, ev.data.fd, cli_fd); - - err = recv(cli_fd, NULL, 0, MSG_TRUNC); - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); - } - - check_sk_pkt_out_cnt(accept_fd, cli_fd); - - close(epfd); - close(accept_fd); - close(cli_fd); - - check_result(); -} - -int main(int argc, char **argv) -{ - struct bpf_prog_load_attr attr = { - .file = "test_sock_fields_kern.o", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .prog_flags = BPF_F_TEST_RND_HI32, - }; - int cgroup_fd, egress_fd, ingress_fd, err; - struct bpf_program *ingress_prog; - struct bpf_object *obj; - struct bpf_map *map; - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); - CHECK(cgroup_fd < 0, "cgroup_setup_and_join()", - "cgroup_fd:%d errno:%d", cgroup_fd, errno); - atexit(cleanup_cgroup_environment); - - err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); - CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); - - ingress_prog = bpf_object__find_program_by_title(obj, - "cgroup_skb/ingress"); - CHECK(!ingress_prog, - "bpf_object__find_program_by_title(cgroup_skb/ingress)", - "not found"); - ingress_fd = bpf_program__fd(ingress_prog); - - err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); - CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", - "err:%d errno%d", err, errno); - - err = bpf_prog_attach(ingress_fd, cgroup_fd, - BPF_CGROUP_INET_INGRESS, 0); - CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", - "err:%d errno%d", err, errno); - close(cgroup_fd); - - map = bpf_object__find_map_by_name(obj, "addr_map"); - CHECK(!map, "cannot find addr_map", "(null)"); - addr_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sock_result_map"); - CHECK(!map, "cannot find sock_result_map", "(null)"); - sk_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); - CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); - tp_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "linum_map"); - CHECK(!map, "cannot find linum_map", "(null)"); - linum_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); - CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); - sk_pkt_out_cnt_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); - CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); - sk_pkt_out_cnt10_fd = bpf_map__fd(map); - - test(); - - bpf_object__close(obj); - cleanup_cgroup_environment(); - - printf("PASS\n"); - - return 0; -} -- cgit v1.3.1 From b18c1f0aa4775bf1ecfbe5942e4e7c6bffdeff6a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:34 -0700 Subject: bpf: selftest: Adapt sock_fields test to use skel and global variables skel is used. Global variables are used to store the result from bpf prog. addr_map, sock_result_map, and tcp_sock_result_map are gone. Instead, global variables listen_tp, srv_sa6, cli_tp,, srv_tp, listen_sk, srv_sk, and cli_sk are added. Because of that, bpf_addr_array_idx and bpf_result_array_idx are also no longer needed. CHECK() macro from test_progs.h is reused and bail as soon as a CHECK failure. shutdown() is used to ensure the previous data-ack is received. The bytes_acked, bytes_received, and the pkt_out_cnt checks are using "<" to accommodate the final ack may not have been received/sent. It is enough since it is not the focus of this test. The sk local storage is all initialized to 0xeB9F now, so the check_sk_pkt_out_cnt() always checks with the 0xeB9F base. It is to keep things simple. The next patch will reuse helpers from network_helpers.h to simplify things further. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000434.3858204-1-kafai@fb.com --- .../testing/selftests/bpf/prog_tests/sock_fields.c | 380 +++++++++------------ .../testing/selftests/bpf/progs/test_sock_fields.c | 154 ++++----- 2 files changed, 229 insertions(+), 305 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 1138223780fc..d96b718639fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -14,20 +14,9 @@ #include #include "cgroup_helpers.h" +#include "test_progs.h" #include "bpf_rlimit.h" - -enum bpf_addr_array_idx { - ADDR_SRV_IDX, - ADDR_CLI_IDX, - __NR_BPF_ADDR_ARRAY_IDX, -}; - -enum bpf_result_array_idx { - EGRESS_SRV_IDX, - EGRESS_CLI_IDX, - INGRESS_LISTEN_IDX, - __NR_BPF_RESULT_ARRAY_IDX, -}; +#include "test_sock_fields.skel.h" enum bpf_linum_array_idx { EGRESS_LINUM_IDX, @@ -40,34 +29,16 @@ struct bpf_spinlock_cnt { __u32 cnt; }; -#define CHECK(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ - printf(format); \ - printf("\n"); \ - exit(-1); \ - } \ -}) - #define TEST_CGROUP "/test-bpf-sock-fields" #define DATA "Hello BPF!" #define DATA_LEN sizeof(DATA) static struct sockaddr_in6 srv_sa6, cli_sa6; static int sk_pkt_out_cnt10_fd; +struct test_sock_fields *skel; static int sk_pkt_out_cnt_fd; static int linum_map_fd; -static int addr_map_fd; -static int tp_map_fd; -static int sk_map_fd; - -static __u32 addr_srv_idx = ADDR_SRV_IDX; -static __u32 addr_cli_idx = ADDR_CLI_IDX; - -static __u32 egress_srv_idx = EGRESS_SRV_IDX; -static __u32 egress_cli_idx = EGRESS_CLI_IDX; -static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; +static __u32 duration; static __u32 egress_linum_idx = EGRESS_LINUM_IDX; static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; @@ -79,7 +50,7 @@ static void init_loopback6(struct sockaddr_in6 *sa6) sa6->sin6_addr = in6addr_loopback; } -static void print_sk(const struct bpf_sock *sk) +static void print_sk(const struct bpf_sock *sk, const char *prefix) { char src_ip4[24], dst_ip4[24]; char src_ip6[64], dst_ip6[64]; @@ -89,9 +60,10 @@ static void print_sk(const struct bpf_sock *sk) inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); - printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " + printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", + prefix, sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, sk->mark, sk->priority, sk->src_ip4, src_ip4, @@ -102,14 +74,15 @@ static void print_sk(const struct bpf_sock *sk) dst_ip6, ntohs(sk->dst_port)); } -static void print_tp(const struct bpf_tcp_sock *tp) +static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix) { - printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " + printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " "rate_delivered:%u rate_interval_us:%u packets_out:%u " "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " "bytes_received:%llu bytes_acked:%llu\n", + prefix, tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, @@ -129,57 +102,26 @@ static void check_result(void) err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, &egress_linum); CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", - "err:%d errno:%d", err, errno); + "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, &ingress_linum); CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", - "err:%d errno:%d", err, errno); - - printf("listen_sk: "); - print_sk(&listen_sk); - printf("\n"); - - printf("srv_sk: "); - print_sk(&srv_sk); - printf("\n"); - - printf("cli_sk: "); - print_sk(&cli_sk); - printf("\n"); - - printf("listen_tp: "); - print_tp(&listen_tp); - printf("\n"); - - printf("srv_tp: "); - print_tp(&srv_tp); - printf("\n"); - - printf("cli_tp: "); - print_tp(&cli_tp); - printf("\n"); + "err:%d errno:%d\n", err, errno); + + memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); + memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); + memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); + memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp)); + memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk)); + memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp)); + + print_sk(&listen_sk, "listen_sk"); + print_sk(&srv_sk, "srv_sk"); + print_sk(&cli_sk, "cli_sk"); + print_tp(&listen_tp, "listen_tp"); + print_tp(&srv_tp, "srv_tp"); + print_tp(&cli_tp, "cli_tp"); CHECK(listen_sk.state != 10 || listen_sk.family != AF_INET6 || @@ -190,8 +132,8 @@ static void check_result(void) listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || listen_sk.src_port != ntohs(srv_sa6.sin6_port) || listen_sk.dst_port, - "Unexpected listen_sk", - "Check listen_sk output. ingress_linum:%u", + "listen_sk", + "Unexpected. Check listen_sk output. ingress_linum:%u\n", ingress_linum); CHECK(srv_sk.state == 10 || @@ -204,7 +146,7 @@ static void check_result(void) sizeof(srv_sk.dst_ip6)) || srv_sk.src_port != ntohs(srv_sa6.sin6_port) || srv_sk.dst_port != cli_sa6.sin6_port, - "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", + "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n", egress_linum); CHECK(cli_sk.state == 10 || @@ -217,30 +159,31 @@ static void check_result(void) sizeof(cli_sk.dst_ip6)) || cli_sk.src_port != ntohs(cli_sa6.sin6_port) || cli_sk.dst_port != srv_sa6.sin6_port, - "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", + "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n", egress_linum); CHECK(listen_tp.data_segs_out || listen_tp.data_segs_in || listen_tp.total_retrans || listen_tp.bytes_acked, - "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", + "listen_tp", + "Unexpected. Check listen_tp output. ingress_linum:%u\n", ingress_linum); CHECK(srv_tp.data_segs_out != 2 || srv_tp.data_segs_in || srv_tp.snd_cwnd != 10 || srv_tp.total_retrans || - srv_tp.bytes_acked != 2 * DATA_LEN, - "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", + srv_tp.bytes_acked < 2 * DATA_LEN, + "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n", egress_linum); CHECK(cli_tp.data_segs_out || cli_tp.data_segs_in != 2 || cli_tp.snd_cwnd != 10 || cli_tp.total_retrans || - cli_tp.bytes_received != 2 * DATA_LEN, - "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", + cli_tp.bytes_received < 2 * DATA_LEN, + "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n", egress_linum); } @@ -257,15 +200,14 @@ static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) &pkt_out_cnt10); /* The bpf prog only counts for fullsock and - * passive conneciton did not become fullsock until 3WHS - * had been finished. - * The bpf prog only counted two data packet out but we - * specially init accept_fd's pkt_out_cnt by 2 in - * init_sk_storage(). Hence, 4 here. + * passive connection did not become fullsock until 3WHS + * had been finished, so the bpf prog only counted two data + * packet out. */ - CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, + CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 || + pkt_out_cnt10.cnt < 0xeB9F + 20, "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", - "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n", err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); pkt_out_cnt.cnt = ~0; @@ -280,14 +222,14 @@ static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) * * The bpf_prog initialized it to 0xeB9F. */ - CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || - pkt_out_cnt10.cnt != 0xeB9F + 40, + CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 || + pkt_out_cnt10.cnt < 0xeB9F + 40, "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", - "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n", err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); } -static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) +static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt) { struct bpf_spinlock_cnt scnt = {}; int err; @@ -295,186 +237,190 @@ static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) scnt.cnt = pkt_out_cnt; err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, BPF_NOEXIST); - CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", - "err:%d errno:%d", err, errno); + if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", + "err:%d errno:%d\n", err, errno)) + return err; - scnt.cnt *= 10; err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, BPF_NOEXIST); - CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", - "err:%d errno:%d", err, errno); + if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", + "err:%d errno:%d\n", err, errno)) + return err; + + return 0; } static void test(void) { - int listen_fd, cli_fd, accept_fd, epfd, err; + int listen_fd = -1, cli_fd = -1, accept_fd = -1, epfd, err, i; struct epoll_event ev; socklen_t addrlen; - int i; + char buf[DATA_LEN]; addrlen = sizeof(struct sockaddr_in6); ev.events = EPOLLIN; epfd = epoll_create(1); - CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); + if (CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d\n", + epfd, errno)) + return; /* Prepare listen_fd */ listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", - listen_fd, errno); + if (CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d\n", + listen_fd, errno)) + goto done; init_loopback6(&srv_sa6); err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); - CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); + if (CHECK(err, "bind(listen_fd)", "err:%d errno:%d\n", err, errno)) + goto done; err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); - CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); err = listen(listen_fd, 1); - CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); + if (CHECK(err, "listen(listen_fd)", "err:%d errno:%d\n", err, errno)) + goto done; /* Prepare cli_fd */ cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); + if (CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d\n", cli_fd, + errno)) + goto done; init_loopback6(&cli_sa6); err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); - CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); + if (CHECK(err, "bind(cli_fd)", "err:%d errno:%d\n", err, errno)) + goto done; err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); - CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", - err, errno); - - /* Update addr_map with srv_sa6 and cli_sa6 */ - err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); - CHECK(err, "map_update", "err:%d errno:%d", err, errno); - - err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); - CHECK(err, "map_update", "err:%d errno:%d", err, errno); + if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n", + err, errno)) + goto done; /* Connect from cli_sa6 to srv_sa6 */ err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); - CHECK(err && errno != EINPROGRESS, - "connect(cli_fd)", "err:%d errno:%d", err, errno); + if (CHECK(err && errno != EINPROGRESS, + "connect(cli_fd)", "err:%d errno:%d\n", err, errno)) + goto done; ev.data.fd = listen_fd; err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", - err, errno); + if (CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", + "err:%d errno:%d\n", err, errno)) + goto done; /* Accept the connection */ /* Have some timeout in accept(listen_fd). Just in case. */ err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != listen_fd, - "epoll_wait(listen_fd)", - "err:%d errno:%d ev.data.fd:%d listen_fd:%d", - err, errno, ev.data.fd, listen_fd); + if (CHECK(err != 1 || ev.data.fd != listen_fd, + "epoll_wait(listen_fd)", + "err:%d errno:%d ev.data.fd:%d listen_fd:%d\n", + err, errno, ev.data.fd, listen_fd)) + goto done; accept_fd = accept(listen_fd, NULL, NULL); - CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", - accept_fd, errno); - close(listen_fd); + if (CHECK(accept_fd == -1, "accept(listen_fd)", + "accept_fd:%d errno:%d\n", + accept_fd, errno)) + goto done; ev.data.fd = cli_fd; err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", - err, errno); + if (CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", + "err:%d errno:%d\n", err, errno)) + goto done; - init_sk_storage(accept_fd, 2); + if (init_sk_storage(accept_fd, 0xeB9F)) + goto done; for (i = 0; i < 2; i++) { - /* Send some data from accept_fd to cli_fd */ - err = send(accept_fd, DATA, DATA_LEN, 0); - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", - err, errno); + /* Send some data from accept_fd to cli_fd. + * MSG_EOR to stop kernel from coalescing two pkts. + */ + err = send(accept_fd, DATA, DATA_LEN, MSG_EOR); + if (CHECK(err != DATA_LEN, "send(accept_fd)", + "err:%d errno:%d\n", err, errno)) + goto done; /* Have some timeout in recv(cli_fd). Just in case. */ err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != cli_fd, - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", - err, errno, ev.data.fd, cli_fd); - - err = recv(cli_fd, NULL, 0, MSG_TRUNC); - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); + if (CHECK(err != 1 || ev.data.fd != cli_fd, + "epoll_wait(cli_fd)", + "err:%d errno:%d ev.data.fd:%d cli_fd:%d\n", + err, errno, ev.data.fd, cli_fd)) + goto done; + + err = recv(cli_fd, buf, DATA_LEN, 0); + if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n", + err, errno)) + goto done; } + shutdown(cli_fd, SHUT_WR); + err = recv(accept_fd, buf, 1, 0); + if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", + err, errno)) + goto done; + shutdown(accept_fd, SHUT_WR); + err = recv(cli_fd, buf, 1, 0); + if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", + err, errno)) + goto done; check_sk_pkt_out_cnt(accept_fd, cli_fd); + check_result(); +done: + if (accept_fd != -1) + close(accept_fd); + if (cli_fd != -1) + close(cli_fd); + if (listen_fd != -1) + close(listen_fd); close(epfd); - close(accept_fd); - close(cli_fd); - - check_result(); } void test_sock_fields(void) { - struct bpf_prog_load_attr attr = { - .file = "test_sock_fields.o", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .prog_flags = BPF_F_TEST_RND_HI32, - }; - int cgroup_fd, egress_fd, ingress_fd, err; - struct bpf_program *ingress_prog; - struct bpf_object *obj; - struct bpf_map *map; + struct bpf_link *egress_link = NULL, *ingress_link = NULL; + int cgroup_fd; /* Create a cgroup, get fd, and join it */ - cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); - CHECK(cgroup_fd < 0, "cgroup_setup_and_join()", - "cgroup_fd:%d errno:%d", cgroup_fd, errno); - atexit(cleanup_cgroup_environment); - - err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); - CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); - - ingress_prog = bpf_object__find_program_by_title(obj, - "cgroup_skb/ingress"); - CHECK(!ingress_prog, - "bpf_object__find_program_by_title(cgroup_skb/ingress)", - "not found"); - ingress_fd = bpf_program__fd(ingress_prog); - - err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); - CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", - "err:%d errno%d", err, errno); - - err = bpf_prog_attach(ingress_fd, cgroup_fd, - BPF_CGROUP_INET_INGRESS, 0); - CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", - "err:%d errno%d", err, errno); - close(cgroup_fd); - - map = bpf_object__find_map_by_name(obj, "addr_map"); - CHECK(!map, "cannot find addr_map", "(null)"); - addr_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sock_result_map"); - CHECK(!map, "cannot find sock_result_map", "(null)"); - sk_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); - CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); - tp_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "linum_map"); - CHECK(!map, "cannot find linum_map", "(null)"); - linum_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); - CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); - sk_pkt_out_cnt_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); - CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); - sk_pkt_out_cnt10_fd = bpf_map__fd(map); + cgroup_fd = test__join_cgroup(TEST_CGROUP); + if (CHECK_FAIL(cgroup_fd < 0)) + return; + + skel = test_sock_fields__open_and_load(); + if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) + goto done; + + egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, + cgroup_fd); + if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n", + PTR_ERR(egress_link))) + goto done; + + ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, + cgroup_fd); + if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n", + PTR_ERR(ingress_link))) + goto done; + + linum_map_fd = bpf_map__fd(skel->maps.linum_map); + sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); + sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10); test(); - bpf_object__close(obj); - cleanup_cgroup_environment(); - - printf("PASS\n"); +done: + bpf_link__destroy(egress_link); + bpf_link__destroy(ingress_link); + test_sock_fields__destroy(skel); + close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 9bcaa37f476a..370e33a858db 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -8,46 +8,12 @@ #include #include -enum bpf_addr_array_idx { - ADDR_SRV_IDX, - ADDR_CLI_IDX, - __NR_BPF_ADDR_ARRAY_IDX, -}; - -enum bpf_result_array_idx { - EGRESS_SRV_IDX, - EGRESS_CLI_IDX, - INGRESS_LISTEN_IDX, - __NR_BPF_RESULT_ARRAY_IDX, -}; - enum bpf_linum_array_idx { EGRESS_LINUM_IDX, INGRESS_LINUM_IDX, __NR_BPF_LINUM_ARRAY_IDX, }; -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX); - __type(key, __u32); - __type(value, struct sockaddr_in6); -} addr_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); - __type(key, __u32); - __type(value, struct bpf_sock); -} sock_result_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); - __type(key, __u32); - __type(value, struct bpf_tcp_sock); -} tcp_sock_result_map SEC(".maps"); - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX); @@ -74,6 +40,14 @@ struct { __type(value, struct bpf_spinlock_cnt); } sk_pkt_out_cnt10 SEC(".maps"); +struct bpf_tcp_sock listen_tp = {}; +struct sockaddr_in6 srv_sa6 = {}; +struct bpf_tcp_sock cli_tp = {}; +struct bpf_tcp_sock srv_tp = {}; +struct bpf_sock listen_sk = {}; +struct bpf_sock srv_sk = {}; +struct bpf_sock cli_sk = {}; + static bool is_loopback6(__u32 *a6) { return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); @@ -130,19 +104,20 @@ static void tpcpy(struct bpf_tcp_sock *dst, dst->bytes_acked = src->bytes_acked; } -#define RETURN { \ +/* Always return CG_OK so that no pkt will be filtered out */ +#define CG_OK 1 + +#define RET_LOG() ({ \ linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \ - return 1; \ -} + bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \ + return CG_OK; \ +}) SEC("cgroup_skb/egress") int egress_read_sock_fields(struct __sk_buff *skb) { struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; - __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; - struct sockaddr_in6 *srv_sa6, *cli_sa6; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; __u32 linum, linum_idx; @@ -150,39 +125,46 @@ int egress_read_sock_fields(struct __sk_buff *skb) linum_idx = EGRESS_LINUM_IDX; sk = skb->sk; - if (!sk || sk->state == 10) - RETURN; + if (!sk) + RET_LOG(); + /* Not the testing egress traffic or + * TCP_LISTEN (10) socket will be copied at the ingress side. + */ + if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || + sk->state == 10) + return CG_OK; + + if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) { + /* Server socket */ + sk_ret = &srv_sk; + tp_ret = &srv_tp; + } else if (sk->dst_port == srv_sa6.sin6_port) { + /* Client socket */ + sk_ret = &cli_sk; + tp_ret = &cli_tp; + } else { + /* Not the testing egress traffic */ + return CG_OK; + } + + /* It must be a fullsock for cgroup_skb/egress prog */ sk = bpf_sk_fullsock(sk); - if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || - !is_loopback6(sk->src_ip6)) - RETURN; + if (!sk) + RET_LOG(); + + /* Not the testing egress traffic */ + if (sk->protocol != IPPROTO_TCP) + return CG_OK; tp = bpf_tcp_sock(sk); if (!tp) - RETURN; - - srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); - cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); - if (!srv_sa6 || !cli_sa6) - RETURN; - - if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) - result_idx = EGRESS_SRV_IDX; - else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) - result_idx = EGRESS_CLI_IDX; - else - RETURN; - - sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); - if (!sk_ret || !tp_ret) - RETURN; + RET_LOG(); skcpy(sk_ret, sk); tpcpy(tp_ret, tp); - if (result_idx == EGRESS_SRV_IDX) { + if (sk_ret == &srv_sk) { /* The userspace has created it for srv sk */ pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, @@ -197,7 +179,7 @@ int egress_read_sock_fields(struct __sk_buff *skb) } if (!pkt_out_cnt || !pkt_out_cnt10) - RETURN; + RET_LOG(); /* Even both cnt and cnt10 have lock defined in their BTF, * intentionally one cnt takes lock while one does not @@ -208,48 +190,44 @@ int egress_read_sock_fields(struct __sk_buff *skb) pkt_out_cnt10->cnt += 10; bpf_spin_unlock(&pkt_out_cnt10->lock); - RETURN; + return CG_OK; } SEC("cgroup_skb/ingress") int ingress_read_sock_fields(struct __sk_buff *skb) { - __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX; - struct bpf_tcp_sock *tp, *tp_ret; - struct bpf_sock *sk, *sk_ret; - struct sockaddr_in6 *srv_sa6; + struct bpf_tcp_sock *tp; __u32 linum, linum_idx; + struct bpf_sock *sk; linum_idx = INGRESS_LINUM_IDX; sk = skb->sk; - if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6)) - RETURN; + if (!sk) + RET_LOG(); - srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); - if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port)) - RETURN; + /* Not the testing ingress traffic to the server */ + if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || + sk->src_port != bpf_ntohs(srv_sa6.sin6_port)) + return CG_OK; - if (sk->state != 10 && sk->state != 12) - RETURN; + /* Only interested in TCP_LISTEN */ + if (sk->state != 10) + return CG_OK; - sk = bpf_get_listener_sock(sk); + /* It must be a fullsock for cgroup_skb/ingress prog */ + sk = bpf_sk_fullsock(sk); if (!sk) - RETURN; + RET_LOG(); tp = bpf_tcp_sock(sk); if (!tp) - RETURN; - - sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); - if (!sk_ret || !tp_ret) - RETURN; + RET_LOG(); - skcpy(sk_ret, sk); - tpcpy(tp_ret, tp); + skcpy(&listen_sk, sk); + tpcpy(&listen_tp, tp); - RETURN; + return CG_OK; } char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From c40a565a04afd0429ecc8a1a2c9aff897796118f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:40 -0700 Subject: bpf: selftest: Use network_helpers in the sock_fields test This patch uses start_server() and connect_to_fd() from network_helpers.h to remove the network testing boiler plate codes. epoll is no longer needed also since the timeout has already been taken care of also. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000440.3858639-1-kafai@fb.com --- .../testing/selftests/bpf/prog_tests/sock_fields.c | 88 +++------------------- 1 file changed, 9 insertions(+), 79 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index d96b718639fb..eea8b96bb1be 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ -#include -#include #include #include #include @@ -12,7 +10,9 @@ #include #include +#include +#include "network_helpers.h" #include "cgroup_helpers.h" #include "test_progs.h" #include "bpf_rlimit.h" @@ -43,13 +43,6 @@ static __u32 duration; static __u32 egress_linum_idx = EGRESS_LINUM_IDX; static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; -static void init_loopback6(struct sockaddr_in6 *sa6) -{ - memset(sa6, 0, sizeof(*sa6)); - sa6->sin6_family = AF_INET6; - sa6->sin6_addr = in6addr_loopback; -} - static void print_sk(const struct bpf_sock *sk, const char *prefix) { char src_ip4[24], dst_ip4[24]; @@ -252,28 +245,14 @@ static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt) static void test(void) { - int listen_fd = -1, cli_fd = -1, accept_fd = -1, epfd, err, i; - struct epoll_event ev; - socklen_t addrlen; + int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i; + socklen_t addrlen = sizeof(struct sockaddr_in6); char buf[DATA_LEN]; - addrlen = sizeof(struct sockaddr_in6); - ev.events = EPOLLIN; - - epfd = epoll_create(1); - if (CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d\n", - epfd, errno)) - return; - /* Prepare listen_fd */ - listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - if (CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d\n", - listen_fd, errno)) - goto done; - - init_loopback6(&srv_sa6); - err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); - if (CHECK(err, "bind(listen_fd)", "err:%d errno:%d\n", err, errno)) + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + /* start_server() has logged the error details */ + if (CHECK_FAIL(listen_fd == -1)) goto done; err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); @@ -282,19 +261,8 @@ static void test(void) goto done; memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); - err = listen(listen_fd, 1); - if (CHECK(err, "listen(listen_fd)", "err:%d errno:%d\n", err, errno)) - goto done; - - /* Prepare cli_fd */ - cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - if (CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d\n", cli_fd, - errno)) - goto done; - - init_loopback6(&cli_sa6); - err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); - if (CHECK(err, "bind(cli_fd)", "err:%d errno:%d\n", err, errno)) + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) goto done; err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); @@ -302,41 +270,12 @@ static void test(void) err, errno)) goto done; - /* Connect from cli_sa6 to srv_sa6 */ - err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); - printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", - ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); - if (CHECK(err && errno != EINPROGRESS, - "connect(cli_fd)", "err:%d errno:%d\n", err, errno)) - goto done; - - ev.data.fd = listen_fd; - err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); - if (CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", - "err:%d errno:%d\n", err, errno)) - goto done; - - /* Accept the connection */ - /* Have some timeout in accept(listen_fd). Just in case. */ - err = epoll_wait(epfd, &ev, 1, 1000); - if (CHECK(err != 1 || ev.data.fd != listen_fd, - "epoll_wait(listen_fd)", - "err:%d errno:%d ev.data.fd:%d listen_fd:%d\n", - err, errno, ev.data.fd, listen_fd)) - goto done; - accept_fd = accept(listen_fd, NULL, NULL); if (CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d\n", accept_fd, errno)) goto done; - ev.data.fd = cli_fd; - err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); - if (CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", - "err:%d errno:%d\n", err, errno)) - goto done; - if (init_sk_storage(accept_fd, 0xeB9F)) goto done; @@ -349,14 +288,6 @@ static void test(void) "err:%d errno:%d\n", err, errno)) goto done; - /* Have some timeout in recv(cli_fd). Just in case. */ - err = epoll_wait(epfd, &ev, 1, 1000); - if (CHECK(err != 1 || ev.data.fd != cli_fd, - "epoll_wait(cli_fd)", - "err:%d errno:%d ev.data.fd:%d cli_fd:%d\n", - err, errno, ev.data.fd, cli_fd)) - goto done; - err = recv(cli_fd, buf, DATA_LEN, 0); if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n", err, errno)) @@ -383,7 +314,6 @@ done: close(cli_fd); if (listen_fd != -1) close(listen_fd); - close(epfd); } void test_sock_fields(void) -- cgit v1.3.1 From edc2d66ad1851db6bd89a611b4e46470f1a40ba9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:46 -0700 Subject: bpf: selftest: Use bpf_skc_to_tcp_sock() in the sock_fields test This test uses bpf_skc_to_tcp_sock() to get a kernel tcp_sock ptr "ktp". Access the ktp->lsndtime and also pass ktp to bpf_sk_storage_get(). It also exercises the bpf_sk_cgroup_id() and bpf_sk_ancestor_cgroup_id() with the "ktp". To do that, a parent cgroup and a child cgroup are created. The bpf prog is attached to the child cgroup. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000446.3858975-1-kafai@fb.com --- .../testing/selftests/bpf/prog_tests/sock_fields.c | 40 ++++++++++++++++++---- .../testing/selftests/bpf/progs/test_sock_fields.c | 24 +++++++++++-- 2 files changed, 55 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index eea8b96bb1be..66e83b8fc69d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -29,7 +29,8 @@ struct bpf_spinlock_cnt { __u32 cnt; }; -#define TEST_CGROUP "/test-bpf-sock-fields" +#define PARENT_CGROUP "/test-bpf-sock-fields" +#define CHILD_CGROUP "/test-bpf-sock-fields/child" #define DATA "Hello BPF!" #define DATA_LEN sizeof(DATA) @@ -37,6 +38,8 @@ static struct sockaddr_in6 srv_sa6, cli_sa6; static int sk_pkt_out_cnt10_fd; struct test_sock_fields *skel; static int sk_pkt_out_cnt_fd; +static __u64 parent_cg_id; +static __u64 child_cg_id; static int linum_map_fd; static __u32 duration; @@ -142,6 +145,8 @@ static void check_result(void) "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n", egress_linum); + CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n"); + CHECK(cli_sk.state == 10 || !cli_sk.state || cli_sk.family != AF_INET6 || @@ -178,6 +183,14 @@ static void check_result(void) cli_tp.bytes_received < 2 * DATA_LEN, "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n", egress_linum); + + CHECK(skel->bss->parent_cg_id != parent_cg_id, + "parent_cg_id", "%zu != %zu\n", + (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id); + + CHECK(skel->bss->child_cg_id != child_cg_id, + "child_cg_id", "%zu != %zu\n", + (size_t)skel->bss->child_cg_id, (size_t)child_cg_id); } static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) @@ -319,25 +332,35 @@ done: void test_sock_fields(void) { struct bpf_link *egress_link = NULL, *ingress_link = NULL; - int cgroup_fd; + int parent_cg_fd = -1, child_cg_fd = -1; /* Create a cgroup, get fd, and join it */ - cgroup_fd = test__join_cgroup(TEST_CGROUP); - if (CHECK_FAIL(cgroup_fd < 0)) + parent_cg_fd = test__join_cgroup(PARENT_CGROUP); + if (CHECK_FAIL(parent_cg_fd < 0)) return; + parent_cg_id = get_cgroup_id(PARENT_CGROUP); + if (CHECK_FAIL(!parent_cg_id)) + goto done; + + child_cg_fd = test__join_cgroup(CHILD_CGROUP); + if (CHECK_FAIL(child_cg_fd < 0)) + goto done; + child_cg_id = get_cgroup_id(CHILD_CGROUP); + if (CHECK_FAIL(!child_cg_id)) + goto done; skel = test_sock_fields__open_and_load(); if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) goto done; egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, - cgroup_fd); + child_cg_fd); if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n", PTR_ERR(egress_link))) goto done; ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, - cgroup_fd); + child_cg_fd); if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n", PTR_ERR(ingress_link))) goto done; @@ -352,5 +375,8 @@ done: bpf_link__destroy(egress_link); bpf_link__destroy(ingress_link); test_sock_fields__destroy(skel); - close(cgroup_fd); + if (child_cg_fd != -1) + close(child_cg_fd); + if (parent_cg_fd != -1) + close(parent_cg_fd); } diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 370e33a858db..81b57b9aaaea 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -7,6 +7,7 @@ #include #include +#include "bpf_tcp_helpers.h" enum bpf_linum_array_idx { EGRESS_LINUM_IDX, @@ -47,6 +48,9 @@ struct bpf_tcp_sock srv_tp = {}; struct bpf_sock listen_sk = {}; struct bpf_sock srv_sk = {}; struct bpf_sock cli_sk = {}; +__u64 parent_cg_id = 0; +__u64 child_cg_id = 0; +__u64 lsndtime = 0; static bool is_loopback6(__u32 *a6) { @@ -121,6 +125,7 @@ int egress_read_sock_fields(struct __sk_buff *skb) struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; __u32 linum, linum_idx; + struct tcp_sock *ktp; linum_idx = EGRESS_LINUM_IDX; @@ -165,9 +170,24 @@ int egress_read_sock_fields(struct __sk_buff *skb) tpcpy(tp_ret, tp); if (sk_ret == &srv_sk) { + ktp = bpf_skc_to_tcp_sock(sk); + + if (!ktp) + RET_LOG(); + + lsndtime = ktp->lsndtime; + + child_cg_id = bpf_sk_cgroup_id(ktp); + if (!child_cg_id) + RET_LOG(); + + parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2); + if (!parent_cg_id) + RET_LOG(); + /* The userspace has created it for srv sk */ - pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); - pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp, 0, 0); } else { pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, -- cgit v1.3.1 From 0c402c6c3031fd6ba23c6a2433b9f804da093b20 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:52 -0700 Subject: bpf: selftest: Remove enum tcp_ca_state from bpf_tcp_helpers.h The enum tcp_ca_state is available in . Remove it from the bpf_tcp_helpers.h to avoid conflict when the bpf prog needs to include both both and bpf_tcp_helpers.h. Modify the bpf_cubic.c and bpf_dctcp.c to use instead. The is needed by . Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000452.3859313-1-kafai@fb.com --- tools/testing/selftests/bpf/bpf_tcp_helpers.h | 8 -------- tools/testing/selftests/bpf/progs/bpf_cubic.c | 2 ++ tools/testing/selftests/bpf/progs/bpf_dctcp.c | 2 ++ 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 5bf2fe9b1efa..a0e8b3758bd7 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -115,14 +115,6 @@ enum tcp_ca_event { CA_EVENT_ECN_IS_CE = 5, }; -enum tcp_ca_state { - TCP_CA_Open = 0, - TCP_CA_Disorder = 1, - TCP_CA_CWR = 2, - TCP_CA_Recovery = 3, - TCP_CA_Loss = 4 -}; - struct ack_sample { __u32 pkts_acked; __s32 rtt_us; diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index ef574087f1e1..6939bfd8690f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -15,6 +15,8 @@ */ #include +#include +#include #include "bpf_tcp_helpers.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 3fb4260570b1..4dc1a967776a 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include "bpf_tcp_helpers.h" -- cgit v1.3.1 From 9a856cae2217ca1bc0726417d32f3f1daf035389 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:58 -0700 Subject: bpf: selftest: Add test_btf_skc_cls_ingress This patch attaches a classifier prog to the ingress filter. It exercises the following helpers with different socket pointer types in different logical branches: 1. bpf_sk_release() 2. bpf_sk_assign() 3. bpf_skc_to_tcp_request_sock(), bpf_skc_to_tcp_sock() 4. bpf_tcp_gen_syncookie, bpf_tcp_check_syncookie Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000458.3859627-1-kafai@fb.com --- tools/testing/selftests/bpf/bpf_tcp_helpers.h | 5 + .../selftests/bpf/prog_tests/btf_skc_cls_ingress.c | 234 +++++++++++++++++++++ .../selftests/bpf/progs/test_btf_skc_cls_ingress.c | 174 +++++++++++++++ 3 files changed, 413 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c create mode 100644 tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index a0e8b3758bd7..2915664c335d 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -16,6 +16,7 @@ BPF_PROG(name, args) struct sock_common { unsigned char skc_state; + __u16 skc_num; } __attribute__((preserve_access_index)); enum sk_pacing { @@ -45,6 +46,10 @@ struct inet_connection_sock { __u64 icsk_ca_priv[104 / sizeof(__u64)]; } __attribute__((preserve_access_index)); +struct request_sock { + struct sock_common __req_common; +} __attribute__((preserve_access_index)); + struct tcp_sock { struct inet_connection_sock inet_conn; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c new file mode 100644 index 000000000000..4ce0e8a25bc5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "network_helpers.h" +#include "test_progs.h" +#include "test_btf_skc_cls_ingress.skel.h" + +struct test_btf_skc_cls_ingress *skel; +struct sockaddr_in6 srv_sa6; +static __u32 duration; + +#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" + +static int write_sysctl(const char *sysctl, const char *value) +{ + int fd, err, len; + + fd = open(sysctl, O_WRONLY); + if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", + sysctl, strerror(errno), errno)) + return -1; + + len = strlen(value); + err = write(fd, value, len); + close(fd); + if (CHECK(err != len, "write sysctl", + "write(%s, %s, %d): err:%d %s (%d)\n", + sysctl, value, len, err, strerror(errno), errno)) + return -1; + + return 0; +} + +static int prepare_netns(void) +{ + if (CHECK(unshare(CLONE_NEWNET), "create netns", + "unshare(CLONE_NEWNET): %s (%d)", + strerror(errno), errno)) + return -1; + + if (CHECK(system("ip link set dev lo up"), + "ip link set dev lo up", "failed\n")) + return -1; + + if (CHECK(system("tc qdisc add dev lo clsact"), + "tc qdisc add dev lo clsact", "failed\n")) + return -1; + + if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE), + "install tc cls-prog at ingress", "failed\n")) + return -1; + + /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the + * bpf_tcp_gen_syncookie() helper. + */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") || + write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") || + write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1")) + return -1; + + return 0; +} + +static void reset_test(void) +{ + memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6)); + skel->bss->listen_tp_sport = 0; + skel->bss->req_sk_sport = 0; + skel->bss->recv_cookie = 0; + skel->bss->gen_cookie = 0; + skel->bss->linum = 0; +} + +static void print_err_line(void) +{ + if (skel->bss->linum) + printf("bpf prog error at line %u\n", skel->bss->linum); +} + +static void test_conn(void) +{ + int listen_fd = -1, cli_fd = -1, err; + socklen_t addrlen = sizeof(srv_sa6); + int srv_port; + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (CHECK_FAIL(listen_fd == -1)) + return; + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + srv_port = ntohs(srv_sa6.sin6_port); + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) + goto done; + + if (CHECK(skel->bss->listen_tp_sport != srv_port || + skel->bss->req_sk_sport != srv_port, + "Unexpected sk src port", + "listen_tp_sport:%u req_sk_sport:%u expected:%u\n", + skel->bss->listen_tp_sport, skel->bss->req_sk_sport, + srv_port)) + goto done; + + if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie, + "Unexpected syncookie states", + "gen_cookie:%u recv_cookie:%u\n", + skel->bss->gen_cookie, skel->bss->recv_cookie)) + goto done; + + CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", + skel->bss->linum); + +done: + if (listen_fd != -1) + close(listen_fd); + if (cli_fd != -1) + close(cli_fd); +} + +static void test_syncookie(void) +{ + int listen_fd = -1, cli_fd = -1, err; + socklen_t addrlen = sizeof(srv_sa6); + int srv_port; + + /* Enforce syncookie mode */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2")) + return; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (CHECK_FAIL(listen_fd == -1)) + return; + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + srv_port = ntohs(srv_sa6.sin6_port); + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) + goto done; + + if (CHECK(skel->bss->listen_tp_sport != srv_port, + "Unexpected tp src port", + "listen_tp_sport:%u expected:%u\n", + skel->bss->listen_tp_sport, srv_port)) + goto done; + + if (CHECK(skel->bss->req_sk_sport, + "Unexpected req_sk src port", + "req_sk_sport:%u expected:0\n", + skel->bss->req_sk_sport)) + goto done; + + if (CHECK(!skel->bss->gen_cookie || + skel->bss->gen_cookie != skel->bss->recv_cookie, + "Unexpected syncookie states", + "gen_cookie:%u recv_cookie:%u\n", + skel->bss->gen_cookie, skel->bss->recv_cookie)) + goto done; + + CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", + skel->bss->linum); + +done: + if (listen_fd != -1) + close(listen_fd); + if (cli_fd != -1) + close(cli_fd); +} + +struct test { + const char *desc; + void (*run)(void); +}; + +#define DEF_TEST(name) { #name, test_##name } +static struct test tests[] = { + DEF_TEST(conn), + DEF_TEST(syncookie), +}; + +void test_btf_skc_cls_ingress(void) +{ + int i, err; + + skel = test_btf_skc_cls_ingress__open_and_load(); + if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n")) + return; + + err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE); + if (CHECK(err, "bpf_program__pin", + "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) { + test_btf_skc_cls_ingress__destroy(skel); + return; + } + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].desc)) + continue; + + if (prepare_netns()) + break; + + tests[i].run(); + + print_err_line(); + reset_test(); + } + + bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE); + test_btf_skc_cls_ingress__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c new file mode 100644 index 000000000000..9a6b85dd52d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "bpf_tcp_helpers.h" + +struct sockaddr_in6 srv_sa6 = {}; +__u16 listen_tp_sport = 0; +__u16 req_sk_sport = 0; +__u32 recv_cookie = 0; +__u32 gen_cookie = 0; +__u32 linum = 0; + +#define LOG() ({ if (!linum) linum = __LINE__; }) + +static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, + struct tcp_sock *tp, + struct __sk_buff *skb) +{ + if (th->syn) { + __s64 mss_cookie; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + if (th->doff * 4 != 40) { + LOG(); + return; + } + + if ((void *)th + 40 > data_end) { + LOG(); + return; + } + + mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h), + th, 40); + if (mss_cookie < 0) { + if (mss_cookie != -ENOENT) + LOG(); + } else { + gen_cookie = (__u32)mss_cookie; + } + } else if (gen_cookie) { + /* It was in cookie mode */ + int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h), + th, sizeof(*th)); + + if (ret < 0) { + if (ret != -ENOENT) + LOG(); + } else { + recv_cookie = bpf_ntohl(th->ack_seq) - 1; + } + } +} + +static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb) +{ + struct bpf_sock_tuple *tuple; + struct bpf_sock *bpf_skc; + unsigned int tuple_len; + struct tcphdr *th; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + th = (struct tcphdr *)(ip6h + 1); + if (th + 1 > data_end) + return TC_ACT_OK; + + /* Is it the testing traffic? */ + if (th->dest != srv_sa6.sin6_port) + return TC_ACT_OK; + + tuple_len = sizeof(tuple->ipv6); + tuple = (struct bpf_sock_tuple *)&ip6h->saddr; + if ((void *)tuple + tuple_len > data_end) { + LOG(); + return TC_ACT_OK; + } + + bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len, + BPF_F_CURRENT_NETNS, 0); + if (!bpf_skc) { + LOG(); + return TC_ACT_OK; + } + + if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) { + struct request_sock *req_sk; + + req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc); + if (!req_sk) { + LOG(); + goto release; + } + + if (bpf_sk_assign(skb, req_sk, 0)) { + LOG(); + goto release; + } + + req_sk_sport = req_sk->__req_common.skc_num; + + bpf_sk_release(req_sk); + return TC_ACT_OK; + } else if (bpf_skc->state == BPF_TCP_LISTEN) { + struct tcp_sock *tp; + + tp = bpf_skc_to_tcp_sock(bpf_skc); + if (!tp) { + LOG(); + goto release; + } + + if (bpf_sk_assign(skb, tp, 0)) { + LOG(); + goto release; + } + + listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num; + + test_syncookie_helper(ip6h, th, tp, skb); + bpf_sk_release(tp); + return TC_ACT_OK; + } + + if (bpf_sk_assign(skb, bpf_skc, 0)) + LOG(); + +release: + bpf_sk_release(bpf_skc); + return TC_ACT_OK; +} + +SEC("classifier/ingress") +int cls_ingress(struct __sk_buff *skb) +{ + struct ipv6hdr *ip6h; + struct ethhdr *eth; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + eth = (struct ethhdr *)(long)(skb->data); + if (eth + 1 > data_end) + return TC_ACT_OK; + + if (eth->h_proto != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + ip6h = (struct ipv6hdr *)(eth + 1); + if (ip6h + 1 > data_end) + return TC_ACT_OK; + + if (ip6h->nexthdr == IPPROTO_TCP) + return handle_ip6_tcp(ip6h, skb); + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From e56dc9e2949edff7932474f2552dd134734cc857 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 30 Jul 2020 20:33:57 -0400 Subject: nfsd: remove fault injection code It was an interesting idea but nobody seems to be using it, it's buggy at this point, and nfs4state.c is already complicated enough without it. The new nfsd/clients/ code provides some of the same functionality, and could probably do more if desired. This feature has been deprecated since 9d60d93198c6 ("Deprecate nfsd fault injection"). Signed-off-by: J. Bruce Fields --- Documentation/admin-guide/nfs/fault_injection.rst | 70 --- Documentation/admin-guide/nfs/index.rst | 1 - fs/nfsd/Kconfig | 10 - fs/nfsd/Makefile | 1 - fs/nfsd/nfs4state.c | 593 ---------------------- fs/nfsd/nfsctl.c | 3 - fs/nfsd/state.h | 27 - tools/nfsd/inject_fault.sh | 50 -- 8 files changed, 755 deletions(-) delete mode 100644 Documentation/admin-guide/nfs/fault_injection.rst delete mode 100755 tools/nfsd/inject_fault.sh (limited to 'tools') diff --git a/Documentation/admin-guide/nfs/fault_injection.rst b/Documentation/admin-guide/nfs/fault_injection.rst deleted file mode 100644 index eb029c0c15ce..000000000000 --- a/Documentation/admin-guide/nfs/fault_injection.rst +++ /dev/null @@ -1,70 +0,0 @@ -=================== -NFS Fault Injection -=================== - -Fault injection is a method for forcing errors that may not normally occur, or -may be difficult to reproduce. Forcing these errors in a controlled environment -can help the developer find and fix bugs before their code is shipped in a -production system. Injecting an error on the Linux NFS server will allow us to -observe how the client reacts and if it manages to recover its state correctly. - -NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this -feature. - - -Using Fault Injection -===================== -On the client, mount the fault injection server through NFS v4.0+ and do some -work over NFS (open files, take locks, ...). - -On the server, mount the debugfs filesystem to and ls -/nfsd. This will show a list of files that will be used for -injecting faults on the NFS server. As root, write a number n to the file -corresponding to the action you want the server to take. The server will then -process the first n items it finds. So if you want to forget 5 locks, echo '5' -to /nfsd/forget_locks. A value of 0 will tell the server to forget -all corresponding items. A log message will be created containing the number -of items forgotten (check dmesg). - -Go back to work on the client and check if the client recovered from the error -correctly. - - -Available Faults -================ -forget_clients: - The NFS server keeps a list of clients that have placed a mount call. If - this list is cleared, the server will have no knowledge of who the client - is, forcing the client to reauthenticate with the server. - -forget_openowners: - The NFS server keeps a list of what files are currently opened and who - they were opened by. Clearing this list will force the client to reopen - its files. - -forget_locks: - The NFS server keeps a list of what files are currently locked in the VFS. - Clearing this list will force the client to reclaim its locks (files are - unlocked through the VFS as they are cleared from this list). - -forget_delegations: - A delegation is used to assure the client that a file, or part of a file, - has not changed since the delegation was awarded. Clearing this list will - force the client to reacquire its delegation before accessing the file - again. - -recall_delegations: - Delegations can be recalled by the server when another client attempts to - access a file. This test will notify the client that its delegation has - been revoked, forcing the client to reacquire the delegation before using - the file again. - - -tools/nfs/inject_faults.sh script -================================= -This script has been created to ease the fault injection process. This script -will detect the mounted debugfs directory and write to the files located there -based on the arguments passed by the user. For example, running -`inject_faults.sh forget_locks 1` as root will instruct the server to forget -one lock. Running `inject_faults forget_locks` will instruct the server to -forgetall locks. diff --git a/Documentation/admin-guide/nfs/index.rst b/Documentation/admin-guide/nfs/index.rst index 6b5a3c90fac5..3601a708f333 100644 --- a/Documentation/admin-guide/nfs/index.rst +++ b/Documentation/admin-guide/nfs/index.rst @@ -12,4 +12,3 @@ NFS nfs-idmapper pnfs-block-server pnfs-scsi-server - fault_injection diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 99d2cae91bd6..9223e13c3051 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -156,13 +156,3 @@ config NFSD_V4_SECURITY_LABEL If you do not wish to enable fine-grained security labels SELinux or Smack policies on NFSv4 files, say N. - -config NFSD_FAULT_INJECTION - bool "NFS server manual fault injection" - depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN - help - This option enables support for manually injecting faults - into the NFS server. This is intended to be used for - testing error recovery on the NFS client. - - If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 6a40b1afe703..3f0983e93a99 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -13,7 +13,6 @@ nfsd-y += trace.o nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o \ stats.o filecache.o -nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c09a2a4281ec..a31ba0e4ef2f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7253,599 +7253,6 @@ nfs4_check_open_reclaim(clientid_t *clid, return nfs_ok; } -#ifdef CONFIG_NFSD_FAULT_INJECTION -static inline void -put_client(struct nfs4_client *clp) -{ - atomic_dec(&clp->cl_rpc_users); -} - -static struct nfs4_client * -nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) -{ - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return NULL; - - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - if (memcmp(&clp->cl_addr, addr, addr_size) == 0) - return clp; - } - return NULL; -} - -u64 -nfsd_inject_print_clients(void) -{ - struct nfs4_client *clp; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - char buf[INET6_ADDRSTRLEN]; - - if (!nfsd_netns_ready(nn)) - return 0; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - pr_info("NFS Client: %s\n", buf); - ++count; - } - spin_unlock(&nn->client_lock); - - return count; -} - -u64 -nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) { - if (mark_client_expired_locked(clp) == nfs_ok) - ++count; - else - clp = NULL; - } - spin_unlock(&nn->client_lock); - - if (clp) - expire_client(clp); - - return count; -} - -u64 -nfsd_inject_forget_clients(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp, *next; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { - if (mark_client_expired_locked(clp) == nfs_ok) { - list_add(&clp->cl_lru, &reaplist); - if (max != 0 && ++count >= max) - break; - } - } - spin_unlock(&nn->client_lock); - - list_for_each_entry_safe(clp, next, &reaplist, cl_lru) - expire_client(clp); - - return count; -} - -static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, - const char *type) -{ - char buf[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); -} - -static void -nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, - struct list_head *collect) -{ - struct nfs4_client *clp = lst->st_stid.sc_client; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!collect) - return; - - lockdep_assert_held(&nn->client_lock); - atomic_inc(&clp->cl_rpc_users); - list_add(&lst->st_locks, collect); -} - -static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, - struct list_head *collect, - bool (*func)(struct nfs4_ol_stateid *)) -{ - struct nfs4_openowner *oop; - struct nfs4_ol_stateid *stp, *st_next; - struct nfs4_ol_stateid *lst, *lst_next; - u64 count = 0; - - spin_lock(&clp->cl_lock); - list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { - list_for_each_entry_safe(stp, st_next, - &oop->oo_owner.so_stateids, st_perstateowner) { - list_for_each_entry_safe(lst, lst_next, - &stp->st_locks, st_locks) { - if (func) { - if (func(lst)) - nfsd_inject_add_lock_to_list(lst, - collect); - } - ++count; - /* - * Despite the fact that these functions deal - * with 64-bit integers for "count", we must - * ensure that it doesn't blow up the - * clp->cl_rpc_users. Throw a warning if we - * start to approach INT_MAX here. - */ - WARN_ON_ONCE(count == (INT_MAX / 2)); - if (count == max) - goto out; - } - } - } -out: - spin_unlock(&clp->cl_lock); - - return count; -} - -static u64 -nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect, - u64 max) -{ - return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid); -} - -static u64 -nfsd_print_client_locks(struct nfs4_client *clp) -{ - u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL); - nfsd_print_count(clp, count, "locked files"); - return count; -} - -u64 -nfsd_inject_print_locks(void) -{ - struct nfs4_client *clp; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return 0; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) - count += nfsd_print_client_locks(clp); - spin_unlock(&nn->client_lock); - - return count; -} - -static void -nfsd_reap_locks(struct list_head *reaplist) -{ - struct nfs4_client *clp; - struct nfs4_ol_stateid *stp, *next; - - list_for_each_entry_safe(stp, next, reaplist, st_locks) { - list_del_init(&stp->st_locks); - clp = stp->st_stid.sc_client; - nfs4_put_stid(&stp->st_stid); - put_client(clp); - } -} - -u64 -nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size) -{ - unsigned int count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = nfsd_collect_client_locks(clp, &reaplist, 0); - spin_unlock(&nn->client_lock); - nfsd_reap_locks(&reaplist); - return count; -} - -u64 -nfsd_inject_forget_locks(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - count += nfsd_collect_client_locks(clp, &reaplist, max - count); - if (max != 0 && count >= max) - break; - } - spin_unlock(&nn->client_lock); - nfsd_reap_locks(&reaplist); - return count; -} - -static u64 -nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, - struct list_head *collect, - void (*func)(struct nfs4_openowner *)) -{ - struct nfs4_openowner *oop, *next; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - u64 count = 0; - - lockdep_assert_held(&nn->client_lock); - - spin_lock(&clp->cl_lock); - list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { - if (func) { - func(oop); - if (collect) { - atomic_inc(&clp->cl_rpc_users); - list_add(&oop->oo_perclient, collect); - } - } - ++count; - /* - * Despite the fact that these functions deal with - * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_rpc_users. Throw a - * warning if we start to approach INT_MAX here. - */ - WARN_ON_ONCE(count == (INT_MAX / 2)); - if (count == max) - break; - } - spin_unlock(&clp->cl_lock); - - return count; -} - -static u64 -nfsd_print_client_openowners(struct nfs4_client *clp) -{ - u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL); - - nfsd_print_count(clp, count, "openowners"); - return count; -} - -static u64 -nfsd_collect_client_openowners(struct nfs4_client *clp, - struct list_head *collect, u64 max) -{ - return nfsd_foreach_client_openowner(clp, max, collect, - unhash_openowner_locked); -} - -u64 -nfsd_inject_print_openowners(void) -{ - struct nfs4_client *clp; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return 0; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) - count += nfsd_print_client_openowners(clp); - spin_unlock(&nn->client_lock); - - return count; -} - -static void -nfsd_reap_openowners(struct list_head *reaplist) -{ - struct nfs4_client *clp; - struct nfs4_openowner *oop, *next; - - list_for_each_entry_safe(oop, next, reaplist, oo_perclient) { - list_del_init(&oop->oo_perclient); - clp = oop->oo_owner.so_client; - release_openowner(oop); - put_client(clp); - } -} - -u64 -nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr, - size_t addr_size) -{ - unsigned int count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = nfsd_collect_client_openowners(clp, &reaplist, 0); - spin_unlock(&nn->client_lock); - nfsd_reap_openowners(&reaplist); - return count; -} - -u64 -nfsd_inject_forget_openowners(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - count += nfsd_collect_client_openowners(clp, &reaplist, - max - count); - if (max != 0 && count >= max) - break; - } - spin_unlock(&nn->client_lock); - nfsd_reap_openowners(&reaplist); - return count; -} - -static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, - struct list_head *victims) -{ - struct nfs4_delegation *dp, *next; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - u64 count = 0; - - lockdep_assert_held(&nn->client_lock); - - spin_lock(&state_lock); - list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { - if (victims) { - /* - * It's not safe to mess with delegations that have a - * non-zero dl_time. They might have already been broken - * and could be processed by the laundromat outside of - * the state_lock. Just leave them be. - */ - if (dp->dl_time != 0) - continue; - - atomic_inc(&clp->cl_rpc_users); - WARN_ON(!unhash_delegation_locked(dp)); - list_add(&dp->dl_recall_lru, victims); - } - ++count; - /* - * Despite the fact that these functions deal with - * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_rpc_users. Throw a - * warning if we start to approach INT_MAX here. - */ - WARN_ON_ONCE(count == (INT_MAX / 2)); - if (count == max) - break; - } - spin_unlock(&state_lock); - return count; -} - -static u64 -nfsd_print_client_delegations(struct nfs4_client *clp) -{ - u64 count = nfsd_find_all_delegations(clp, 0, NULL); - - nfsd_print_count(clp, count, "delegations"); - return count; -} - -u64 -nfsd_inject_print_delegations(void) -{ - struct nfs4_client *clp; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return 0; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) - count += nfsd_print_client_delegations(clp); - spin_unlock(&nn->client_lock); - - return count; -} - -static void -nfsd_forget_delegations(struct list_head *reaplist) -{ - struct nfs4_client *clp; - struct nfs4_delegation *dp, *next; - - list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { - list_del_init(&dp->dl_recall_lru); - clp = dp->dl_stid.sc_client; - revoke_delegation(dp); - put_client(clp); - } -} - -u64 -nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr, - size_t addr_size) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = nfsd_find_all_delegations(clp, 0, &reaplist); - spin_unlock(&nn->client_lock); - - nfsd_forget_delegations(&reaplist); - return count; -} - -u64 -nfsd_inject_forget_delegations(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - count += nfsd_find_all_delegations(clp, max - count, &reaplist); - if (max != 0 && count >= max) - break; - } - spin_unlock(&nn->client_lock); - nfsd_forget_delegations(&reaplist); - return count; -} - -static void -nfsd_recall_delegations(struct list_head *reaplist) -{ - struct nfs4_client *clp; - struct nfs4_delegation *dp, *next; - - list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { - list_del_init(&dp->dl_recall_lru); - clp = dp->dl_stid.sc_client; - - trace_nfsd_deleg_recall(&dp->dl_stid.sc_stateid); - - /* - * We skipped all entries that had a zero dl_time before, - * so we can now reset the dl_time back to 0. If a delegation - * break comes in now, then it won't make any difference since - * we're recalling it either way. - */ - spin_lock(&state_lock); - dp->dl_time = 0; - spin_unlock(&state_lock); - nfsd_break_one_deleg(dp); - put_client(clp); - } -} - -u64 -nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr, - size_t addr_size) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = nfsd_find_all_delegations(clp, 0, &reaplist); - spin_unlock(&nn->client_lock); - - nfsd_recall_delegations(&reaplist); - return count; -} - -u64 -nfsd_inject_recall_delegations(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp, *next; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { - count += nfsd_find_all_delegations(clp, max - count, &reaplist); - if (max != 0 && ++count >= max) - break; - } - spin_unlock(&nn->client_lock); - nfsd_recall_delegations(&reaplist); - return count; -} -#endif /* CONFIG_NFSD_FAULT_INJECTION */ - /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7ae236113040..f6d5d783f4a4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1534,7 +1534,6 @@ static int __init init_nfsd(void) retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; - nfsd_fault_inject_init(); /* nfsd fault injection controls */ nfsd_stat_init(); /* Statistics */ retval = nfsd_drc_slab_create(); if (retval) @@ -1555,7 +1554,6 @@ out_free_lockd: nfsd_drc_slab_free(); out_free_stat: nfsd_stat_shutdown(); - nfsd_fault_inject_cleanup(); nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); @@ -1575,7 +1573,6 @@ static void __exit exit_nfsd(void) nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); - nfsd_fault_inject_cleanup(); unregister_filesystem(&nfsd_fs_type); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 3b408532a5dc..9eae11a9d21c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -693,31 +693,4 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp); extern int nfsd4_client_record_check(struct nfs4_client *clp); extern void nfsd4_record_grace_done(struct nfsd_net *nn); -/* nfs fault injection functions */ -#ifdef CONFIG_NFSD_FAULT_INJECTION -void nfsd_fault_inject_init(void); -void nfsd_fault_inject_cleanup(void); - -u64 nfsd_inject_print_clients(void); -u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_clients(u64); - -u64 nfsd_inject_print_locks(void); -u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_locks(u64); - -u64 nfsd_inject_print_openowners(void); -u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_openowners(u64); - -u64 nfsd_inject_print_delegations(void); -u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_delegations(u64); -u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t); -u64 nfsd_inject_recall_delegations(u64); -#else /* CONFIG_NFSD_FAULT_INJECTION */ -static inline void nfsd_fault_inject_init(void) {} -static inline void nfsd_fault_inject_cleanup(void) {} -#endif /* CONFIG_NFSD_FAULT_INJECTION */ - #endif /* NFSD4_STATE_H */ diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh deleted file mode 100755 index 10ceee64a09a..000000000000 --- a/tools/nfsd/inject_fault.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Copyright (c) 2011 Bryan Schumaker -# -# Script for easier NFSD fault injection - -# Check that debugfs has been mounted -DEBUGFS=`cat /proc/mounts | grep debugfs` -if [ "$DEBUGFS" == "" ]; then - echo "debugfs does not appear to be mounted!" - echo "Please mount debugfs and try again" - exit 1 -fi - -# Check that the fault injection directory exists -DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd -if [ ! -d "$DEBUGDIR" ]; then - echo "$DEBUGDIR does not exist" - echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION" - exit 1 -fi - -function help() -{ - echo "Usage $0 injection_type [count]" - echo "" - echo "Injection types are:" - ls $DEBUGDIR - exit 1 -} - -if [ $# == 0 ]; then - help -elif [ ! -f $DEBUGDIR/$1 ]; then - help -elif [ $# != 2 ]; then - COUNT=0 -else - COUNT=$2 -fi - -BEFORE=`mktemp` -AFTER=`mktemp` -dmesg > $BEFORE -echo $COUNT > $DEBUGDIR/$1 -dmesg > $AFTER -# Capture lines that only exist in the $AFTER file -diff $BEFORE $AFTER | grep ">" -rm -f $BEFORE $AFTER -- cgit v1.3.1 From 99d4def4d08507474b250dad6345d14715a4726b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 24 Sep 2020 11:45:22 -0700 Subject: bpf: Add AND verifier test case where 32bit and 64bit bounds differ If we AND two values together that are known in the 32bit subregs, but not known in the 64bit registers we rely on the tnum value to report the 32bit subreg is known. And do not use mark_reg_known() directly from scalar32_min_max_and() Add an AND test to cover the case with known 32bit subreg, but unknown 64bit reg. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/and.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c index d781bc86e100..ca8fdb1b3f01 100644 --- a/tools/testing/selftests/bpf/verifier/and.c +++ b/tools/testing/selftests/bpf/verifier/and.c @@ -48,3 +48,19 @@ .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "check known subreg with unknown reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234), + /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */ + BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0 +}, -- cgit v1.3.1 From bc75c054f04048517e0b153ab38d973bbcdcef59 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 25 Sep 2020 13:46:06 -0700 Subject: devlink: convert flash_update to use params structure The devlink core recently gained support for checking whether the driver supports a flash_update parameter, via `supported_flash_update_params`. However, parameters are specified as function arguments. Adding a new parameter still requires modifying the signature of the .flash_update callback in all drivers. Convert the .flash_update function to take a new `struct devlink_flash_update_params` instead. By using this structure, and the `supported_flash_update_params` bit field, a new parameter to flash_update can be added without requiring modification to existing drivers. As before, all parameters except file_name will require driver opt-in. Because file_name is a necessary field to for the flash_update to make sense, no "SUPPORTED" bitflag is provided and it is always considered valid. All future additional parameters will require a new bit in the supported_flash_update_params bitfield. Signed-off-by: Jacob Keller Reviewed-by: Jakub Kicinski Cc: Jiri Pirko Cc: Jakub Kicinski Cc: Jonathan Corbet Cc: Michael Chan Cc: Bin Luo Cc: Saeed Mahameed Cc: Leon Romanovsky Cc: Ido Schimmel Cc: Danielle Ratson Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 7 ++++--- drivers/net/ethernet/huawei/hinic/hinic_devlink.c | 5 ++--- drivers/net/ethernet/intel/ice/ice_devlink.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 5 ++--- drivers/net/ethernet/mellanox/mlxsw/core.c | 9 ++++----- drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 7 ++++--- drivers/net/ethernet/pensando/ionic/ionic_devlink.c | 5 ++--- drivers/net/netdevsim/dev.c | 14 +++++++------- include/net/devlink.h | 18 ++++++++++++++++-- net/core/devlink.c | 14 ++++++++------ .../testing/selftests/drivers/net/netdevsim/devlink.sh | 3 +++ 11 files changed, 57 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index a17764db1419..d436134bdc40 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -17,8 +17,9 @@ #include "bnxt_ethtool.h" static int -bnxt_dl_flash_update(struct devlink *dl, const char *filename, - const char *region, struct netlink_ext_ack *extack) +bnxt_dl_flash_update(struct devlink *dl, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) { struct bnxt *bp = bnxt_get_bp_from_dl(dl); int rc; @@ -31,7 +32,7 @@ bnxt_dl_flash_update(struct devlink *dl, const char *filename, devlink_flash_update_begin_notify(dl); devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); - rc = bnxt_flash_package_from_file(bp->dev, filename, 0); + rc = bnxt_flash_package_from_file(bp->dev, params->file_name, 0); if (!rc) devlink_flash_update_status_notify(dl, "Flashing done", NULL, 0, 0); else diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index 662a27a514ae..2630d667f393 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -281,15 +281,14 @@ static int hinic_firmware_update(struct hinic_devlink_priv *priv, } static int hinic_devlink_flash_update(struct devlink *devlink, - const char *file_name, - const char *component, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { struct hinic_devlink_priv *priv = devlink_priv(devlink); const struct firmware *fw; int err; - err = request_firmware_direct(&fw, file_name, + err = request_firmware_direct(&fw, params->file_name, &priv->hwdev->hwif->pdev->dev); if (err) return err; diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 4666f1d5a695..c3d61c246f0b 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -233,8 +233,7 @@ static int ice_devlink_info_get(struct devlink *devlink, /** * ice_devlink_flash_update - Update firmware stored in flash on the device * @devlink: pointer to devlink associated with device to update - * @path: the path of the firmware file to use via request_firmware - * @component: name of the component to update, or NULL + * @params: flash update parameters * @extack: netlink extended ACK structure * * Perform a device flash update. The bulk of the update logic is contained @@ -243,8 +242,9 @@ static int ice_devlink_info_get(struct devlink *devlink, * Returns: zero on success, or an error code on failure. */ static int -ice_devlink_flash_update(struct devlink *devlink, const char *path, - const char *component, struct netlink_ext_ack *extack) +ice_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); struct device *dev = &pf->pdev->dev; @@ -261,7 +261,7 @@ ice_devlink_flash_update(struct devlink *devlink, const char *path, if (err) return err; - err = request_firmware(&fw, path, dev); + err = request_firmware(&fw, params->file_name, dev); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to read file from disk"); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index fccae4b802b6..9b14e3f805a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -8,15 +8,14 @@ #include "eswitch.h" static int mlx5_devlink_flash_update(struct devlink *devlink, - const char *file_name, - const char *component, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); const struct firmware *fw; int err; - err = request_firmware_direct(&fw, file_name, &dev->pdev->dev); + err = request_firmware_direct(&fw, params->file_name, &dev->pdev->dev); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 3ffd03ef9c0e..f8dddcf461f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1102,13 +1102,13 @@ static int mlxsw_core_fw_rev_validate(struct mlxsw_core *mlxsw_core, } static int mlxsw_core_fw_flash_update(struct mlxsw_core *mlxsw_core, - const char *file_name, const char *component, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { const struct firmware *firmware; int err; - err = request_firmware_direct(&firmware, file_name, mlxsw_core->bus_info->dev); + err = request_firmware_direct(&firmware, params->file_name, mlxsw_core->bus_info->dev); if (err) return err; err = mlxsw_core_fw_flash(mlxsw_core, firmware, extack); @@ -1431,13 +1431,12 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, } static int mlxsw_devlink_flash_update(struct devlink *devlink, - const char *file_name, - const char *component, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - return mlxsw_core_fw_flash_update(mlxsw_core, file_name, component, extack); + return mlxsw_core_fw_flash_update(mlxsw_core, params, extack); } static int mlxsw_devlink_trap_init(struct devlink *devlink, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index c93cb9a27e25..97d2b03208de 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -329,10 +329,11 @@ err_close_nsp: } static int -nfp_devlink_flash_update(struct devlink *devlink, const char *path, - const char *component, struct netlink_ext_ack *extack) +nfp_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) { - return nfp_flash_update_common(devlink_priv(devlink), path, extack); + return nfp_flash_update_common(devlink_priv(devlink), params->file_name, extack); } const struct devlink_ops nfp_devlink_ops = { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index c125988b0954..51d64718ed9f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -10,13 +10,12 @@ #include "ionic_devlink.h" static int ionic_dl_flash_update(struct devlink *dl, - const char *fwname, - const char *component, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { struct ionic *ionic = devlink_priv(dl); - return ionic_firmware_update(ionic->lif, fwname, extack); + return ionic_firmware_update(ionic->lif, params->file_name, extack); } static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req, diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 424bdab8e888..2d218afad375 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -742,8 +742,8 @@ static int nsim_dev_info_get(struct devlink *devlink, #define NSIM_DEV_FLASH_CHUNK_SIZE 1000 #define NSIM_DEV_FLASH_CHUNK_TIME_MS 10 -static int nsim_dev_flash_update(struct devlink *devlink, const char *file_name, - const char *component, +static int nsim_dev_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); @@ -753,13 +753,13 @@ static int nsim_dev_flash_update(struct devlink *devlink, const char *file_name, devlink_flash_update_begin_notify(devlink); devlink_flash_update_status_notify(devlink, "Preparing to flash", - component, 0, 0); + params->component, 0, 0); } for (i = 0; i < NSIM_DEV_FLASH_SIZE / NSIM_DEV_FLASH_CHUNK_SIZE; i++) { if (nsim_dev->fw_update_status) devlink_flash_update_status_notify(devlink, "Flashing", - component, + params->component, i * NSIM_DEV_FLASH_CHUNK_SIZE, NSIM_DEV_FLASH_SIZE); msleep(NSIM_DEV_FLASH_CHUNK_TIME_MS); @@ -767,13 +767,13 @@ static int nsim_dev_flash_update(struct devlink *devlink, const char *file_name, if (nsim_dev->fw_update_status) { devlink_flash_update_status_notify(devlink, "Flashing", - component, + params->component, NSIM_DEV_FLASH_SIZE, NSIM_DEV_FLASH_SIZE); devlink_flash_update_timeout_notify(devlink, "Flash select", - component, 81); + params->component, 81); devlink_flash_update_status_notify(devlink, "Flashing done", - component, 0, 0); + params->component, 0, 0); devlink_flash_update_end_notify(devlink); } diff --git a/include/net/devlink.h b/include/net/devlink.h index cec6b4f109fa..7794e1601772 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -550,6 +550,20 @@ enum devlink_param_generic_id { /* Firmware bundle identifier */ #define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID "fw.bundle_id" +/** + * struct devlink_flash_update_params - Flash Update parameters + * @file_name: the name of the flash firmware file to update from + * @component: the flash component to update + * + * With the exception of file_name, drivers must opt-in to parameters by + * setting the appropriate bit in the supported_flash_update_params field in + * their devlink_ops structure. + */ +struct devlink_flash_update_params { + const char *file_name; + const char *component; +}; + #define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT BIT(0) struct devlink_region; @@ -1112,8 +1126,8 @@ struct devlink_ops { * parameters supported by the driver should be set in * supported_flash_update_params. */ - int (*flash_update)(struct devlink *devlink, const char *file_name, - const char *component, + int (*flash_update)(struct devlink *devlink, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack); /** * @trap_init: Trap initialization function. diff --git a/net/core/devlink.c b/net/core/devlink.c index 7c69a41016a2..6766f9ef3152 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3147,8 +3147,8 @@ EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify); static int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info) { + struct devlink_flash_update_params params = {}; struct devlink *devlink = info->user_ptr[0]; - const char *file_name, *component = NULL; struct nlattr *nla_component; u32 supported_params; @@ -3160,7 +3160,7 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb, supported_params = devlink->ops->supported_flash_update_params; - file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]); + params.file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]); nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT]; if (nla_component) { @@ -3169,11 +3169,10 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb, "component update is not supported by this device"); return -EOPNOTSUPP; } - component = nla_data(nla_component); + params.component = nla_data(nla_component); } - return devlink->ops->flash_update(devlink, file_name, component, - info->extack); + return devlink->ops->flash_update(devlink, ¶ms, info->extack); } static const struct devlink_param devlink_param_generic[] = { @@ -9651,6 +9650,7 @@ out: int devlink_compat_flash_update(struct net_device *dev, const char *file_name) { + struct devlink_flash_update_params params = {}; struct devlink *devlink; int ret; @@ -9663,8 +9663,10 @@ int devlink_compat_flash_update(struct net_device *dev, const char *file_name) goto out; } + params.file_name = file_name; + mutex_lock(&devlink->lock); - ret = devlink->ops->flash_update(devlink, file_name, NULL, NULL); + ret = devlink->ops->flash_update(devlink, ¶ms, NULL); mutex_unlock(&devlink->lock); out: diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index de4b32fc4223..1e7541688978 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -23,6 +23,9 @@ fw_flash_test() devlink dev flash $DL_HANDLE file dummy check_err $? "Failed to flash with status updates on" + devlink dev flash $DL_HANDLE file dummy component fw.mgmt + check_err $? "Failed to flash with component attribute" + echo "n"> $DEBUGFS_DIR/fw_update_status check_err $? "Failed to disable status updates" -- cgit v1.3.1 From cbb58368fbccc823a85df73bd6bf7db6dfda86aa Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 25 Sep 2020 13:46:08 -0700 Subject: netdevsim: add support for flash_update overwrite mask The devlink interface recently gained support for a new "overwrite mask" parameter that allows specifying how various sub-sections of a flash component are modified when updating. Add support for this to netdevsim, to enable easily testing the interface. Make the allowed overwrite mask values controllable via a debugfs parameter. This enables testing a flow where the driver rejects an unsupportable overwrite mask. Signed-off-by: Jacob Keller Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 10 +++++++++- drivers/net/netdevsim/netdevsim.h | 1 + .../testing/selftests/drivers/net/netdevsim/devlink.sh | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 2d218afad375..56213ba151f6 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -203,6 +203,8 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) return PTR_ERR(nsim_dev->ports_ddir); debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir, &nsim_dev->fw_update_status); + debugfs_create_u32("fw_update_overwrite_mask", 0600, nsim_dev->ddir, + &nsim_dev->fw_update_overwrite_mask); debugfs_create_u32("max_macs", 0600, nsim_dev->ddir, &nsim_dev->max_macs); debugfs_create_bool("test1", 0600, nsim_dev->ddir, @@ -749,6 +751,9 @@ static int nsim_dev_flash_update(struct devlink *devlink, struct nsim_dev *nsim_dev = devlink_priv(devlink); int i; + if ((params->overwrite_mask & ~nsim_dev->fw_update_overwrite_mask) != 0) + return -EOPNOTSUPP; + if (nsim_dev->fw_update_status) { devlink_flash_update_begin_notify(devlink); devlink_flash_update_status_notify(devlink, @@ -879,7 +884,8 @@ nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink, } static const struct devlink_ops nsim_dev_devlink_ops = { - .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT, + .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT | + DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, .reload_down = nsim_dev_reload_down, .reload_up = nsim_dev_reload_up, .info_get = nsim_dev_info_get, @@ -994,6 +1000,7 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, INIT_LIST_HEAD(&nsim_dev->port_list); mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; + nsim_dev->fw_update_overwrite_mask = 0; nsim_dev->fib_data = nsim_fib_create(devlink, extack); if (IS_ERR(nsim_dev->fib_data)) @@ -1052,6 +1059,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) INIT_LIST_HEAD(&nsim_dev->port_list); mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; + nsim_dev->fw_update_overwrite_mask = 0; nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT; spin_lock_init(&nsim_dev->fa_cookie_lock); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 0c86561e6d8d..0a1b28aea894 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -196,6 +196,7 @@ struct nsim_dev { struct list_head port_list; struct mutex port_list_lock; /* protects port list */ bool fw_update_status; + u32 fw_update_overwrite_mask; u32 max_macs; bool test1; bool dont_allow_reload; diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 1e7541688978..40909c254365 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -26,6 +26,24 @@ fw_flash_test() devlink dev flash $DL_HANDLE file dummy component fw.mgmt check_err $? "Failed to flash with component attribute" + devlink dev flash $DL_HANDLE file dummy overwrite settings + check_fail $? "Flash with overwrite settings should be rejected" + + echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask + check_err $? "Failed to change allowed overwrite mask" + + devlink dev flash $DL_HANDLE file dummy overwrite settings + check_err $? "Failed to flash with settings overwrite enabled" + + devlink dev flash $DL_HANDLE file dummy overwrite identifiers + check_fail $? "Flash with overwrite settings should be identifiers" + + echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask + check_err $? "Failed to change allowed overwrite mask" + + devlink dev flash $DL_HANDLE file dummy overwrite identifiers overwrite settings + check_err $? "Failed to flash with settings and identifiers overwrite enabled" + echo "n"> $DEBUGFS_DIR/fw_update_status check_err $? "Failed to disable status updates" -- cgit v1.3.1 From 090bc03bc9387b135307dae69a3242173d86de8d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 25 Sep 2020 18:19:13 -0700 Subject: netdevsim: fix duplicated debugfs directory The "ethtool" debugfs directory holds per-netdev knobs, so move it from the device instance directory to the port directory. This fixes the following warning when creating multiple ports: debugfs: Directory 'ethtool' with parent 'netdevsim1' already present! Fixes: ff1f7c17fb20 ("netdevsim: add pause frame stats") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/ethtool.c | 2 +- tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 7a4c779b4c89..f1884d90a876 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -54,7 +54,7 @@ void nsim_ethtool_init(struct netdevsim *ns) ns->netdev->ethtool_ops = &nsim_ethtool_ops; - ethtool = debugfs_create_dir("ethtool", ns->nsim_dev->ddir); + ethtool = debugfs_create_dir("ethtool", ns->nsim_dev_port->ddir); dir = debugfs_create_dir("pause", ethtool); debugfs_create_bool("report_stats_rx", 0600, dir, diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh index dd6ad6501c9c..25c896b9e2eb 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh @@ -3,7 +3,7 @@ NSIM_ID=$((RANDOM % 1024)) NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID -NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0 NSIM_NETDEV= num_passes=0 num_errors=0 -- cgit v1.3.1 From 038d3b53c284bc0e9ba891e64c44abf6aee8065c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 22 Sep 2020 10:51:19 +0800 Subject: perf vendor events intel: Update CascadelakeX events to v1.08 - Update CascadelakeX events to v1.08. - Update CascadelakeX JSON metrics from TMAM 4.0. Other fixes: - Add NO_NMI_WATCHDOG metric constraint to Backend_Bound - Change 'MB/sec' to 'MB' in UNC_M_PMM_BANDWIDTH. Signed-off-by: Jin Yao Reviewed-by: Andi Kleen Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Shishkin Cc: Kan Liang Link: https://lore.kernel.org/lkml/20200922031918.3723-1-yao.jin@linux.intel.com/ Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/cascadelakex/cache.json | 28 +- .../arch/x86/cascadelakex/clx-metrics.json | 153 +-- .../pmu-events/arch/x86/cascadelakex/frontend.json | 34 + .../pmu-events/arch/x86/cascadelakex/memory.json | 704 ++++++------- .../pmu-events/arch/x86/cascadelakex/other.json | 1100 ++++++++++---------- .../pmu-events/arch/x86/cascadelakex/pipeline.json | 10 - .../arch/x86/cascadelakex/uncore-memory.json | 12 +- .../arch/x86/cascadelakex/uncore-other.json | 21 + 8 files changed, 1067 insertions(+), 995 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json index 3fba310a5012..3c0f5837480f 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json @@ -8063,6 +8063,20 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, + { + "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Deprecated": "1", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000020004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, { "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", @@ -9255,20 +9269,6 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, - { - "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", - "Counter": "0,1,2,3", - "CounterHTOff": "0,1,2,3", - "Deprecated": "1", - "EventCode": "0xB7, 0xBB", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x1000020004", - "Offcore": "1", - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "UMask": "0x1" - }, { "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index d25eebce34c9..de3193552277 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -4,14 +4,14 @@ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", "MetricGroup": "TopdownL1", "MetricName": "Frontend_Bound", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Frontend_Bound_SMT", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." }, { "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", @@ -22,13 +22,14 @@ }, { "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Bad_Speculation_SMT", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", "MetricGroup": "TopdownL1", "MetricName": "Backend_Bound", @@ -36,7 +37,7 @@ }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", "MetricGroup": "TopdownL1_SMT", "MetricName": "Backend_Bound_SMT", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." @@ -50,7 +51,7 @@ }, { "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Retiring_SMT", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU." @@ -58,7 +59,7 @@ { "BriefDescription": "Instructions Per Cycle (per Logical Processor)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "TopDownL1", + "MetricGroup": "Summary", "MetricName": "IPC" }, { @@ -73,24 +74,6 @@ "MetricGroup": "Branches;Fetch_BW;PGO", "MetricName": "IpTB" }, - { - "BriefDescription": "Branch instructions per taken branch. ", - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;PGO", - "MetricName": "BpTB" - }, - { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", - "MetricGroup": "PGO;IcMiss", - "MetricName": "IFetch_Line_Utilization" - }, - { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", - "MetricGroup": "DSB;Fetch_BW", - "MetricName": "DSB_Coverage" - }, { "BriefDescription": "Cycles Per Instruction (per Logical Processor)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", @@ -104,86 +87,110 @@ "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", "MetricExpr": "4 * cycles", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", - "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "MetricGroup": "TopDownL1_SMT", "MetricName": "SLOTS_SMT" }, { - "BriefDescription": "Instructions per Load (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", "MetricGroup": "Instruction_Type", - "MetricName": "IpL" + "MetricName": "IpLoad" }, { - "BriefDescription": "Instructions per Store (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", "MetricGroup": "Instruction_Type", - "MetricName": "IpS" + "MetricName": "IpStore" }, { - "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Branches;Instruction_Type", - "MetricName": "IpB" + "MetricName": "IpBranch" }, { - "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)", + "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", "MetricGroup": "Branches", "MetricName": "IpCall" }, + { + "BriefDescription": "Branch instructions per taken branch. ", + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTkBranch" + }, + { + "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", + "MetricGroup": "FLOPS;FP_Arith;Instruction_Type", + "MetricName": "IpFLOP" + }, { "BriefDescription": "Total number of retired Instructions", "MetricExpr": "INST_RETIRED.ANY", - "MetricGroup": "Summary", + "MetricGroup": "Summary;TopDownL1", "MetricName": "Instructions" }, + { + "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", + "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", + "MetricGroup": "LSD", + "MetricName": "LSD_Coverage" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", + "MetricGroup": "DSB;Fetch_BW", + "MetricName": "DSB_Coverage" + }, { "BriefDescription": "Instructions Per Cycle (per physical core)", "MetricExpr": "INST_RETIRED.ANY / cycles", - "MetricGroup": "SMT", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC" }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", - "MetricGroup": "SMT", + "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC_SMT" }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles", "MetricGroup": "FLOPS", "MetricName": "FLOPc" }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "MetricGroup": "FLOPS_SMT", "MetricName": "FLOPc_SMT" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", - "MetricGroup": "Pipeline", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )", + "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "BrMispredicts", "MetricName": "Branch_Misprediction_Cost" }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "BrMispredicts_SMT", "MetricName": "Branch_Misprediction_Cost_SMT" }, @@ -213,14 +220,14 @@ }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization", - "MetricConstraint": "NO_NMI_WATCHDOG" + "MetricName": "Page_Walks_Utilization" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", "MetricGroup": "TLB_SMT", "MetricName": "Page_Walks_Utilization_SMT" }, @@ -245,7 +252,7 @@ { "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;Offcore", "MetricName": "L3_Cache_Access_BW" }, { @@ -263,7 +270,7 @@ { "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses", + "MetricGroup": "Cache_Misses;Offcore", "MetricName": "L2MPKI_All" }, { @@ -298,7 +305,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time", + "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, @@ -310,62 +317,74 @@ }, { "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", - "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", + "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Summary", + "MetricGroup": "OS", "MetricName": "Kernel_Utilization" }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", - "MetricGroup": "Memory_Lat", - "MetricName": "DRAM_Read_Latency" + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricGroup": "Memory_Lat;SoC", + "MetricName": "MEM_Read_Latency" }, { "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@", - "MetricGroup": "Memory_BW", - "MetricName": "DRAM_Parallel_Reads" + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@", + "MetricGroup": "Memory_BW;SoC", + "MetricName": "MEM_Parallel_Reads" }, { "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )", - "MetricGroup": "Memory_Lat", + "MetricGroup": "Memory_Lat;SoC;Server", "MetricName": "MEM_PMM_Read_Latency" }, { "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;SoC;Server", "MetricName": "PMM_Read_BW" }, { "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;SoC;Server", "MetricName": "PMM_Write_BW" }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time", + "MetricGroup": "IO_BW;SoC;Server", + "MetricName": "IO_Write_BW" + }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time", + "MetricGroup": "IO_BW;SoC;Server", + "MetricName": "IO_Read_BW" + }, { "BriefDescription": "Socket actual clocks when any core is active on that socket", "MetricExpr": "cha_0@event\\=0x0@", - "MetricGroup": "", + "MetricGroup": "SoC", "MetricName": "Socket_CLKS" }, { - "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )", + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )", - "MetricGroup": "", + "MetricGroup": "Branches;OS", "MetricName": "IpFarBranch" }, { diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json index 3553472ad266..0716b2e3ff75 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json @@ -246,6 +246,30 @@ "SampleAfterValue": "2000003", "UMask": "0x10" }, + { + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.COUNT", + "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", + "Counter": "0,1,2,3,4,5,6,7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x400106", + "PEBS": "2", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, { "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", "Counter": "0,1,2,3", @@ -359,6 +383,16 @@ "SampleAfterValue": "2000003", "UMask": "0x24" }, + { + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xE6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json index cc66a51c6a7b..0c07cb4fbf58 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -13,7 +13,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -26,7 +26,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -81,7 +81,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -177,7 +177,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -218,7 +218,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -245,7 +245,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -286,7 +286,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -313,7 +313,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -350,7 +350,7 @@ "UMask": "0x8" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -405,7 +405,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -432,7 +432,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -473,7 +473,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -513,7 +513,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -540,7 +540,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -678,7 +678,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -705,7 +705,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -779,7 +779,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -792,7 +792,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -847,7 +847,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -874,7 +874,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -950,7 +950,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -977,7 +977,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1045,7 +1045,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1058,7 +1058,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1071,7 +1071,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1084,7 +1084,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1097,7 +1097,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1258,7 +1258,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1285,7 +1285,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1298,7 +1298,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1339,7 +1339,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1380,7 +1380,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1393,7 +1393,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1406,7 +1406,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1419,7 +1419,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1432,7 +1432,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1445,7 +1445,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1513,7 +1513,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1540,7 +1540,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1619,7 +1619,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1632,7 +1632,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1645,7 +1645,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1672,7 +1672,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1699,7 +1699,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1712,7 +1712,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1725,7 +1725,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1738,7 +1738,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1765,7 +1765,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_MISS OCR.ALL_READS.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1806,7 +1806,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1833,7 +1833,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1874,7 +1874,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1887,7 +1887,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1942,7 +1942,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1969,7 +1969,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1982,7 +1982,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1995,7 +1995,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2008,7 +2008,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2021,7 +2021,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2048,7 +2048,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2089,7 +2089,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2144,7 +2144,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2185,7 +2185,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2212,7 +2212,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2225,7 +2225,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2241,7 +2241,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "MSRValue": "0x80", @@ -2290,7 +2291,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2345,7 +2346,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2386,7 +2387,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2399,7 +2400,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2426,7 +2427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2439,7 +2440,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2452,7 +2453,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2493,7 +2494,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2520,7 +2521,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2547,7 +2548,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2560,7 +2561,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2586,7 +2587,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2599,7 +2600,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2612,7 +2613,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2653,7 +2654,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2694,7 +2695,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2707,7 +2708,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2730,7 +2731,7 @@ "UMask": "0x40" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2743,7 +2744,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2756,7 +2757,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2769,7 +2770,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2782,7 +2783,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2809,7 +2810,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2822,7 +2823,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2835,7 +2836,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2848,7 +2849,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2875,7 +2876,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2902,7 +2903,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2915,7 +2916,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2928,7 +2929,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2941,7 +2942,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2954,7 +2955,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2981,7 +2982,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3050,7 +3051,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3104,7 +3105,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3117,7 +3118,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3195,7 +3196,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3222,7 +3223,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3235,7 +3236,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3290,7 +3291,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3373,7 +3374,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3413,7 +3414,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3426,7 +3427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3481,7 +3482,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3522,7 +3523,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3535,7 +3536,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3548,7 +3549,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3561,7 +3562,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3588,7 +3589,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3601,7 +3602,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3614,7 +3615,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3778,7 +3779,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3791,7 +3792,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3804,7 +3805,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3859,7 +3860,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3886,7 +3887,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3899,7 +3900,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3925,7 +3926,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3938,7 +3939,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3979,7 +3980,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4044,7 +4045,7 @@ "UMask": "0x20" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4071,7 +4072,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4111,7 +4112,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4138,7 +4139,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4151,7 +4152,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4164,7 +4165,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4205,7 +4206,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4243,7 +4244,7 @@ "UMask": "0x4" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4284,7 +4285,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4297,7 +4298,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4310,7 +4311,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4323,7 +4324,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4336,7 +4337,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4363,7 +4364,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4390,7 +4391,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4503,7 +4504,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4529,7 +4530,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4556,7 +4557,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4569,7 +4570,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4582,7 +4583,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4623,7 +4624,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4636,7 +4637,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4649,7 +4650,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4662,7 +4663,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4685,7 +4686,7 @@ "UMask": "0x80" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4698,7 +4699,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4711,7 +4712,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4724,7 +4725,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_MISS OCR.ALL_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4765,7 +4766,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4802,7 +4803,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4828,7 +4829,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4841,7 +4842,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4909,7 +4910,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4950,7 +4951,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4977,7 +4978,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4990,21 +4991,7 @@ "UMask": "0x1" }, { - "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", - "Counter": "0,1,2,3", - "CounterHTOff": "0,1,2,3", - "Deprecated": "1", - "EventCode": "0xB7, 0xBB", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0110000001", - "Offcore": "1", - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "UMask": "0x1" - }, - { - "BriefDescription": "ALL_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5017,7 +5004,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5030,7 +5017,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5043,7 +5030,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5070,7 +5057,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HITM OCR.ALL_READS.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5097,7 +5084,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5152,7 +5139,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5188,7 +5175,7 @@ "UMask": "0x10" }, { - "BriefDescription": "ALL_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HITM OCR.ALL_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5201,7 +5188,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5214,7 +5201,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5227,7 +5214,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5240,7 +5227,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5253,7 +5240,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5266,7 +5253,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5335,7 +5322,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5390,7 +5377,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5403,7 +5390,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5419,7 +5406,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "MSRValue": "0x20", @@ -5444,7 +5432,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5457,7 +5445,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5470,7 +5458,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5510,7 +5498,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5523,7 +5511,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5536,7 +5524,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5549,7 +5537,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5585,7 +5573,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_NONE OCR.ALL_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5598,7 +5586,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5667,7 +5655,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5708,7 +5696,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5747,7 +5735,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5760,7 +5748,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_NONE OCR.ALL_READS.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5829,7 +5817,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5870,7 +5858,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5925,7 +5913,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5938,7 +5926,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5951,7 +5939,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5964,7 +5952,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5977,7 +5965,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6004,7 +5992,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6017,7 +6005,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6030,7 +6018,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6099,7 +6087,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6181,7 +6169,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6222,7 +6210,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6235,7 +6223,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6262,7 +6250,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6359,7 +6347,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6400,7 +6388,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6413,7 +6401,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6426,7 +6414,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6439,7 +6427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6452,7 +6440,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HITM_OTHER_CORE OCR.OTHER.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6496,7 +6484,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "MSRValue": "0x100", @@ -6592,7 +6581,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "MSRValue": "0x10", @@ -6640,7 +6630,7 @@ "UMask": "0x20" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6694,7 +6684,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6707,7 +6697,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6763,7 +6753,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6776,7 +6766,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6789,7 +6779,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6816,7 +6806,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6843,7 +6833,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6873,7 +6863,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "MSRValue": "0x200", @@ -6893,7 +6884,7 @@ "UMask": "0x20" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6962,7 +6953,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6975,7 +6966,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7016,7 +7007,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7029,7 +7020,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7070,7 +7061,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7083,7 +7074,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7096,7 +7087,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7123,7 +7114,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7136,7 +7127,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7149,7 +7140,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7204,7 +7195,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7217,7 +7208,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7243,7 +7234,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "MSRValue": "0x40", @@ -7254,7 +7246,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7295,7 +7287,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7336,7 +7328,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7405,7 +7397,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7418,7 +7410,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.ANY_SNOOP OCR.OTHER.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7431,7 +7423,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7444,7 +7436,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7457,7 +7449,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7470,7 +7462,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7483,7 +7475,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7524,7 +7516,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7551,7 +7543,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7564,7 +7556,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7577,7 +7569,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7604,7 +7596,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7617,7 +7609,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7658,7 +7650,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7681,7 +7673,7 @@ "UMask": "0x2" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7708,7 +7700,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7735,7 +7727,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7762,7 +7754,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7789,7 +7781,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7802,7 +7794,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7815,7 +7807,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7828,7 +7820,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7841,7 +7833,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7854,7 +7846,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7895,7 +7887,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7922,7 +7914,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7949,7 +7941,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7975,19 +7967,6 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, - { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", - "Counter": "0,1,2,3", - "CounterHTOff": "0,1,2,3", - "EventCode": "0xB7, 0xBB", - "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0810000001", - "Offcore": "1", - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "UMask": "0x1" - }, { "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", @@ -8003,7 +7982,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8026,7 +8005,7 @@ "UMask": "0x2" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8039,7 +8018,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8070,7 +8049,7 @@ "UMask": "0x10" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8083,7 +8062,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8236,7 +8215,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8249,7 +8228,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8262,7 +8241,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8289,7 +8268,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8344,7 +8323,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8357,7 +8336,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8426,7 +8405,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8439,7 +8418,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8452,7 +8431,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8520,7 +8499,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8630,7 +8609,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8713,7 +8692,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8726,7 +8705,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8766,7 +8745,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8803,7 +8782,7 @@ "UMask": "0x10" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8816,7 +8795,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8857,7 +8836,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8968,7 +8947,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8981,7 +8960,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8994,7 +8973,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9021,7 +9000,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9047,7 +9026,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9060,7 +9039,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9073,7 +9052,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9168,6 +9147,19 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, + { + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0810000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, { "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", "Counter": "0,1,2,3", @@ -9261,7 +9253,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9316,7 +9308,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9329,7 +9321,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9356,7 +9348,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9383,7 +9375,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9424,7 +9416,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9465,7 +9457,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9506,7 +9498,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9533,7 +9525,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9546,7 +9538,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9601,7 +9593,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9614,7 +9606,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9641,7 +9633,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9678,7 +9670,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9705,7 +9697,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9718,7 +9710,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9745,7 +9737,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9772,7 +9764,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9785,7 +9777,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9812,7 +9804,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9839,7 +9831,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9855,7 +9847,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "MSRValue": "0x4", @@ -9883,7 +9876,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "MSRValue": "0x8", @@ -9894,7 +9888,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9905,5 +9899,19 @@ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "UMask": "0x1" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Deprecated": "1", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0110000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json index 05d13d53c374..f77d78e90954 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -13,7 +13,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -26,7 +26,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -39,7 +39,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -52,7 +52,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -65,7 +65,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -78,7 +78,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -104,7 +104,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -117,7 +117,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -130,7 +130,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -143,7 +143,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -156,7 +156,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -169,7 +169,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -182,7 +182,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -195,7 +195,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -208,7 +208,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -221,7 +221,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -234,7 +234,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -247,7 +247,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -260,7 +260,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -273,7 +273,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -299,7 +299,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -312,7 +312,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -325,7 +325,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -338,7 +338,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -351,7 +351,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -364,7 +364,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -377,7 +377,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -390,7 +390,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -403,7 +403,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -416,7 +416,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -429,7 +429,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -442,7 +442,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -478,7 +478,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -491,7 +491,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -517,7 +517,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -530,7 +530,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -543,7 +543,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -556,7 +556,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -582,7 +582,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -595,7 +595,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -621,7 +621,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -634,7 +634,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -660,7 +660,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -673,7 +673,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -699,7 +699,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -712,7 +712,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -738,7 +738,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -751,7 +751,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_MISS OCR.ALL_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -764,7 +764,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -777,7 +777,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -790,7 +790,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -803,7 +803,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -816,7 +816,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -829,7 +829,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -842,7 +842,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -855,7 +855,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -868,7 +868,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -881,7 +881,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -894,7 +894,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -946,7 +946,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -985,7 +985,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -998,7 +998,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1011,7 +1011,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1024,7 +1024,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1050,7 +1050,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1063,7 +1063,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1076,7 +1076,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1089,7 +1089,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1102,7 +1102,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1115,7 +1115,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1128,7 +1128,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1141,7 +1141,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1167,7 +1167,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1180,7 +1180,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1193,7 +1193,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1206,7 +1206,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1219,7 +1219,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1232,7 +1232,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1245,7 +1245,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1258,7 +1258,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1297,7 +1297,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1310,7 +1310,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_PF_DATA_RD.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1323,7 +1323,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1336,7 +1336,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1349,7 +1349,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1362,7 +1362,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1388,7 +1388,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1401,7 +1401,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1414,7 +1414,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1427,7 +1427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1440,7 +1440,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.ANY_SNOOP OCR.ALL_READS.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1453,7 +1453,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1466,7 +1466,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1479,7 +1479,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1492,7 +1492,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1505,7 +1505,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1518,7 +1518,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1531,7 +1531,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1544,7 +1544,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1557,7 +1557,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1570,7 +1570,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1583,7 +1583,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1596,7 +1596,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1609,7 +1609,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1622,7 +1622,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1635,7 +1635,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1661,7 +1661,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_RFO.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1726,7 +1726,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1739,7 +1739,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1752,7 +1752,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1765,7 +1765,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1778,7 +1778,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1791,7 +1791,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1804,7 +1804,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1817,7 +1817,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_MISS OCR.ALL_READS.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1830,7 +1830,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1843,7 +1843,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1856,7 +1856,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1908,7 +1908,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1921,7 +1921,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1934,7 +1934,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1947,7 +1947,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1960,7 +1960,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1973,7 +1973,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1986,7 +1986,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1999,7 +1999,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2012,7 +2012,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2025,7 +2025,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2038,7 +2038,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2051,7 +2051,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2077,7 +2077,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2090,7 +2090,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2103,7 +2103,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2116,7 +2116,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2129,7 +2129,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2142,7 +2142,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2155,7 +2155,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2168,7 +2168,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2194,7 +2194,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2207,7 +2207,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2220,7 +2220,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2233,7 +2233,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2246,7 +2246,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2259,7 +2259,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2272,7 +2272,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2285,7 +2285,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2337,7 +2337,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2350,7 +2350,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2363,7 +2363,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2376,7 +2376,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2389,7 +2389,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2402,7 +2402,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2428,7 +2428,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2441,7 +2441,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2454,7 +2454,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2490,7 +2490,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2503,7 +2503,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2516,7 +2516,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2529,7 +2529,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2542,7 +2542,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2555,7 +2555,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2568,7 +2568,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2581,7 +2581,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2594,7 +2594,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2607,7 +2607,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2620,7 +2620,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_PF_RFO.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2633,7 +2633,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2646,7 +2646,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2659,7 +2659,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2672,7 +2672,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2685,7 +2685,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2698,7 +2698,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2711,7 +2711,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2724,7 +2724,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2747,7 +2747,7 @@ "UMask": "0x40" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2773,7 +2773,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2799,7 +2799,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2812,7 +2812,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2825,7 +2825,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2838,7 +2838,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2851,7 +2851,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2877,7 +2877,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2890,7 +2890,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2916,7 +2916,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2929,7 +2929,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2942,7 +2942,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2955,7 +2955,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2968,7 +2968,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2994,7 +2994,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3020,7 +3020,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3046,7 +3046,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3059,7 +3059,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3072,7 +3072,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3085,7 +3085,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3098,7 +3098,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3111,7 +3111,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3124,7 +3124,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3137,7 +3137,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_DATA_RD.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3150,7 +3150,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3163,7 +3163,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3189,7 +3189,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3202,7 +3202,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3215,7 +3215,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3241,7 +3241,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3254,7 +3254,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3267,7 +3267,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3280,7 +3280,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3306,7 +3306,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3319,7 +3319,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3332,7 +3332,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3345,7 +3345,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3358,7 +3358,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3371,7 +3371,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3384,7 +3384,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3397,7 +3397,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3423,7 +3423,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3436,7 +3436,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3449,7 +3449,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3462,7 +3462,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3475,7 +3475,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3488,7 +3488,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3501,7 +3501,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3514,7 +3514,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3527,7 +3527,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3540,7 +3540,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3553,7 +3553,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3566,7 +3566,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3579,7 +3579,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3592,7 +3592,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3605,7 +3605,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3618,7 +3618,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3631,7 +3631,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3644,7 +3644,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3657,7 +3657,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3670,7 +3670,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3696,7 +3696,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3709,7 +3709,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3735,7 +3735,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3748,7 +3748,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3771,7 +3771,7 @@ "UMask": "0x2" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3797,7 +3797,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3810,7 +3810,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3823,7 +3823,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3836,7 +3836,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3849,7 +3849,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3875,7 +3875,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3888,7 +3888,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3914,7 +3914,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3927,7 +3927,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3940,7 +3940,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3953,7 +3953,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3966,7 +3966,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3979,7 +3979,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3992,7 +3992,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4005,7 +4005,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4018,7 +4018,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4031,7 +4031,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4044,7 +4044,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4057,7 +4057,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4070,7 +4070,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4092,7 +4092,7 @@ "UMask": "0x4" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4118,7 +4118,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4131,7 +4131,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4144,7 +4144,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4157,7 +4157,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4170,7 +4170,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4196,7 +4196,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4209,7 +4209,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4235,7 +4235,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4248,7 +4248,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4261,7 +4261,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4274,7 +4274,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4287,7 +4287,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4300,7 +4300,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4313,7 +4313,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4326,7 +4326,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4339,7 +4339,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4352,7 +4352,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4365,7 +4365,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4378,7 +4378,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4391,7 +4391,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4404,7 +4404,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4417,7 +4417,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4430,7 +4430,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4443,7 +4443,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4469,7 +4469,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4495,7 +4495,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4521,7 +4521,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4534,7 +4534,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4560,7 +4560,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4573,7 +4573,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4586,7 +4586,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4599,7 +4599,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4612,7 +4612,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4625,7 +4625,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4638,7 +4638,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4651,7 +4651,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4677,7 +4677,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4690,7 +4690,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4716,7 +4716,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_NONE OCR.ALL_READS.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4729,7 +4729,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4742,7 +4742,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4755,7 +4755,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4768,7 +4768,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4781,7 +4781,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4794,7 +4794,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4807,7 +4807,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4820,7 +4820,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4833,7 +4833,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.ANY_SNOOP OCR.ALL_READS.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4846,7 +4846,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4859,7 +4859,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_READS.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4872,7 +4872,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4885,7 +4885,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4898,7 +4898,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4911,7 +4911,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4924,7 +4924,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4937,7 +4937,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4950,7 +4950,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4963,7 +4963,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4976,7 +4976,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5015,7 +5015,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5041,7 +5041,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5054,7 +5054,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5067,7 +5067,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5080,7 +5080,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5093,7 +5093,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5106,7 +5106,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5119,7 +5119,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.ANY_SNOOP OCR.ALL_READS.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5132,7 +5132,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5145,7 +5145,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5158,7 +5158,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5171,7 +5171,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5184,7 +5184,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5197,7 +5197,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5210,7 +5210,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5236,7 +5236,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5249,7 +5249,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5262,7 +5262,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5275,7 +5275,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_NONE OCR.ALL_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5288,7 +5288,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5301,7 +5301,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5314,7 +5314,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5327,7 +5327,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5340,7 +5340,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5353,7 +5353,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5366,7 +5366,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5379,7 +5379,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5401,7 +5401,7 @@ "UMask": "0x2" }, { - "BriefDescription": "ALL_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5414,7 +5414,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5427,7 +5427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5440,7 +5440,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5476,7 +5476,7 @@ "UMask": "0x20" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5489,7 +5489,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5502,7 +5502,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5515,7 +5515,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5528,7 +5528,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5541,7 +5541,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5554,7 +5554,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5567,7 +5567,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5580,7 +5580,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5593,7 +5593,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5606,7 +5606,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5619,7 +5619,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5645,7 +5645,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5658,7 +5658,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5671,7 +5671,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5684,7 +5684,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5697,7 +5697,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5710,7 +5710,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5723,7 +5723,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5736,7 +5736,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5749,7 +5749,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5762,7 +5762,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5775,7 +5775,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5788,7 +5788,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5801,7 +5801,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5814,7 +5814,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5827,7 +5827,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5840,7 +5840,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5853,7 +5853,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5866,7 +5866,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5879,7 +5879,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5905,7 +5905,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5918,7 +5918,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5931,7 +5931,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5957,7 +5957,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5970,7 +5970,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5983,7 +5983,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5996,7 +5996,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6009,7 +6009,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6022,7 +6022,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6035,7 +6035,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6048,7 +6048,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6074,7 +6074,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6100,7 +6100,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6113,7 +6113,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6139,7 +6139,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6152,7 +6152,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6165,7 +6165,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6191,7 +6191,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6204,7 +6204,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6217,7 +6217,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6230,7 +6230,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6243,7 +6243,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6256,7 +6256,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6269,7 +6269,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.ANY_SNOOP OCR.OTHER.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6282,7 +6282,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6295,7 +6295,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6308,7 +6308,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6321,7 +6321,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6347,7 +6347,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6360,7 +6360,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6373,7 +6373,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6386,7 +6386,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6412,7 +6412,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6438,7 +6438,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6451,7 +6451,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6464,7 +6464,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6477,7 +6477,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6490,7 +6490,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6529,7 +6529,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6542,7 +6542,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6555,7 +6555,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6568,7 +6568,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6620,7 +6620,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6633,7 +6633,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6659,7 +6659,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6672,7 +6672,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6685,7 +6685,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6698,7 +6698,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6711,7 +6711,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6763,7 +6763,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6776,7 +6776,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6789,7 +6789,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6815,7 +6815,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6828,7 +6828,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6841,7 +6841,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6867,7 +6867,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6880,7 +6880,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6893,7 +6893,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6906,7 +6906,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6919,7 +6919,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6958,7 +6958,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6971,7 +6971,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6984,7 +6984,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6997,7 +6997,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7019,7 +7019,7 @@ "UMask": "0x8" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7032,7 +7032,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7045,7 +7045,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7058,7 +7058,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7071,7 +7071,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7084,7 +7084,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7097,7 +7097,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7110,7 +7110,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7123,7 +7123,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7136,7 +7136,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7149,7 +7149,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7162,7 +7162,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7175,7 +7175,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7188,7 +7188,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7201,7 +7201,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7214,7 +7214,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7227,7 +7227,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7240,7 +7240,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7253,7 +7253,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7276,7 +7276,7 @@ "UMask": "0x18" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7289,7 +7289,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7315,7 +7315,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7328,7 +7328,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7341,7 +7341,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7354,7 +7354,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7367,7 +7367,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7380,7 +7380,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7393,7 +7393,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7406,7 +7406,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7419,7 +7419,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7432,7 +7432,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7445,7 +7445,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7458,7 +7458,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7520,7 +7520,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7533,7 +7533,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7546,7 +7546,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7572,7 +7572,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7585,7 +7585,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7598,7 +7598,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7611,7 +7611,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7637,7 +7637,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7650,7 +7650,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7676,7 +7676,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7689,7 +7689,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7702,7 +7702,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7728,7 +7728,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7741,7 +7741,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7754,7 +7754,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7767,7 +7767,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7780,7 +7780,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7793,7 +7793,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7806,7 +7806,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7832,7 +7832,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7845,7 +7845,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7858,7 +7858,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7884,7 +7884,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7897,7 +7897,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7910,7 +7910,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7949,7 +7949,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7962,7 +7962,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7975,7 +7975,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8001,7 +8001,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8014,7 +8014,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8027,7 +8027,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8053,7 +8053,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8092,7 +8092,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8105,7 +8105,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8118,7 +8118,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8131,7 +8131,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8144,7 +8144,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8157,7 +8157,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8170,7 +8170,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8183,7 +8183,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HITM_OTHER_CORE OCR.OTHER.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8196,7 +8196,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8222,7 +8222,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8235,7 +8235,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8257,7 +8257,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8270,7 +8270,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8283,7 +8283,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8309,7 +8309,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8322,7 +8322,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8348,7 +8348,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8361,7 +8361,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8374,7 +8374,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8387,7 +8387,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8400,7 +8400,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8413,7 +8413,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8426,7 +8426,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8439,7 +8439,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8452,7 +8452,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8465,7 +8465,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8478,7 +8478,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.ANY_SNOOP OCR.ALL_READS.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8504,7 +8504,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8517,7 +8517,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8543,7 +8543,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8556,7 +8556,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8569,7 +8569,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8582,7 +8582,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8595,7 +8595,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8608,7 +8608,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8621,7 +8621,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8634,7 +8634,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8647,7 +8647,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json index 5ec668f46ac1..023f31c72a42 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json @@ -826,16 +826,6 @@ "SampleAfterValue": "2000003", "UMask": "0x2" }, - { - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", - "Counter": "0,1,2,3", - "CounterHTOff": "0,1,2,3,4,5,6,7", - "EventCode": "0xE6", - "EventName": "BACLEARS.ANY", - "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", - "SampleAfterValue": "100003", - "UMask": "0x1" - }, { "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json index 3fb5cdce842f..4ba9e6d9f25e 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json @@ -90,32 +90,32 @@ "Unit": "iMC" }, { - "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB/sec). Derived from unc_m_pmm_rpq_inserts", + "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB). Derived from unc_m_pmm_rpq_inserts", "Counter": "0,1,2,3", "EventCode": "0xE3", "EventName": "UNC_M_PMM_BANDWIDTH.READ", "PerPkg": "1", - "ScaleUnit": "6.103515625E-5MB/sec", + "ScaleUnit": "6.103515625E-5MB", "Unit": "iMC" }, { - "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB/sec). Derived from unc_m_pmm_wpq_inserts", + "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB). Derived from unc_m_pmm_wpq_inserts", "Counter": "0,1,2,3", "EventCode": "0xE7", "EventName": "UNC_M_PMM_BANDWIDTH.WRITE", "PerPkg": "1", - "ScaleUnit": "6.103515625E-5MB/sec", + "ScaleUnit": "6.103515625E-5MB", "Unit": "iMC" }, { - "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB/sec). Derived from unc_m_pmm_rpq_inserts", + "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB). Derived from unc_m_pmm_rpq_inserts", "Counter": "0,1,2,3", "EventCode": "0xE3", "EventName": "UNC_M_PMM_BANDWIDTH.TOTAL", "MetricExpr": "UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS", "MetricName": "UNC_M_PMM_BANDWIDTH.TOTAL", "PerPkg": "1", - "ScaleUnit": "6.103515625E-5MB/sec", + "ScaleUnit": "6.103515625E-5MB", "Unit": "iMC" }, { diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json index df355ba7acc8..0cd083839e75 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json @@ -537,6 +537,27 @@ "UMask": "0x10", "Unit": "CHA" }, + { + "BriefDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC", + "Counter": "0,1,2,3", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD", + "Filter": "config1=0x40433", + "PerPkg": "1", + "PublicDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.", + "UMask": "0x21", + "Unit": "CHA" + }, + { + "BriefDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD", + "Filter": "config1=0x40433", + "PerPkg": "1", + "PublicDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.", + "UMask": "0x21", + "Unit": "CHA" + }, { "BriefDescription": "Clockticks of the IIO Traffic Controller", "Counter": "0,1,2,3", -- cgit v1.3.1 From b5ff7f2799a4191eb9d91479db59c855ddeaf81e Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 13 May 2020 16:13:33 +0800 Subject: perf vendor events: Update SkylakeX events to v1.21 - Update SkylakeX events to v1.21. - Update SkylakeX JSON metrics from TMAM 4.0. Other fixes: - Add NO_NMI_WATCHDOG metric constraint to Backend_Bound - Fix misspelled error Signed-off-by: Jin Yao Reviewed-by: Andi Kleen Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/20200922031918.3723-1-yao.jin@linux.intel.com/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/skylakex/cache.json | 2348 ++++++++++---------- .../arch/x86/skylakex/floating-point.json | 96 +- .../pmu-events/arch/x86/skylakex/frontend.json | 656 +++--- .../perf/pmu-events/arch/x86/skylakex/memory.json | 1977 ++++++++-------- tools/perf/pmu-events/arch/x86/skylakex/other.json | 172 +- .../pmu-events/arch/x86/skylakex/pipeline.json | 1206 +++++----- .../pmu-events/arch/x86/skylakex/skx-metrics.json | 141 +- .../arch/x86/skylakex/uncore-memory.json | 26 +- .../pmu-events/arch/x86/skylakex/uncore-other.json | 730 +++++- .../arch/x86/skylakex/virtual-memory.json | 358 +-- 10 files changed, 4137 insertions(+), 3573 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json index 24df183693fa..e750a21976f1 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -1,1663 +1,1675 @@ [ { - "EventCode": "0x24", - "UMask": "0x21", - "BriefDescription": "Demand Data Read miss L2, no rejects", + "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", - "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x24", - "UMask": "0x22", - "BriefDescription": "RFO requests that miss L2 cache", + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_MISS", - "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "EventCode": "0x24", - "UMask": "0x24", - "BriefDescription": "L2 cache misses when fetching instructions", + "BriefDescription": "Counts all demand code reads that have any response type.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "PublicDescription": "Counts L2 cache misses when fetching instructions.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x24", - "UMask": "0x27", + "EventName": "L2_RQSTS.PF_MISS", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x38" + }, + { "BriefDescription": "Demand requests that miss L2 cache", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "PublicDescription": "Demand requests that miss L2 cache.", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x27" }, { - "EventCode": "0x24", - "UMask": "0x38", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", + "BriefDescription": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.PF_MISS", - "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x24", - "UMask": "0x3f", - "BriefDescription": "All requests that miss L2 cache", + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.MISS", - "PublicDescription": "All requests that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.", + "SampleAfterValue": "50021", + "UMask": "0x4" }, { - "EventCode": "0x24", - "UMask": "0xc1", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "BriefDescription": "L2 writebacks that access L2 cache", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF0", + "EventName": "L2_TRANS.L2_WB", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0x24", - "UMask": "0xc2", - "BriefDescription": "RFO requests that hit L2 cache", + "BriefDescription": "L2 cache lines filling L2", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_HIT", - "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF1", + "EventName": "L2_LINES_IN.ALL", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "SampleAfterValue": "100003", + "UMask": "0x1f" }, { - "EventCode": "0x24", - "UMask": "0xc4", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x24", - "UMask": "0xd8", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.PF_HIT", - "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x24", - "UMask": "0xe1", - "BriefDescription": "Demand Data Read requests", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x24", - "UMask": "0xe2", - "BriefDescription": "RFO requests to L2 cache", + "BriefDescription": "Demand Data Read requests sent to uncore", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_RFO", - "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x24", - "UMask": "0xe4", - "BriefDescription": "L2 code requests", + "BriefDescription": "Retired load instructions missed L3 cache as data sources", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "PublicDescription": "Counts the total number of L2 code requests.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.", + "SampleAfterValue": "100007", + "UMask": "0x20" }, { - "EventCode": "0x24", - "UMask": "0xe7", - "BriefDescription": "Demand requests to L2 cache", + "BriefDescription": "All retired store instructions.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "PublicDescription": "Demand requests to L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x82" }, { - "EventCode": "0x24", - "UMask": "0xf8", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", + "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_PF", - "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.SILENT", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x24", - "UMask": "0xff", - "BriefDescription": "All L2 requests", + "BriefDescription": "Counts all prefetch data reads that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.REFERENCES", - "PublicDescription": "All L2 requests.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x2E", - "UMask": "0x41", - "BriefDescription": "Core-originated cacheable demand requests missed L3", + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "LONGEST_LAT_CACHE.MISS", - "Errata": "SKL057", - "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x2E", - "UMask": "0x4f", - "BriefDescription": "Core-originated cacheable demand requests that refer to L3", + "BriefDescription": "Core-originated cacheable demand requests missed L3", "Counter": "0,1,2,3", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL057", - "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", + "EventCode": "0x2E", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x41" }, { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding.", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_PF", + "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "SampleAfterValue": "200003", + "UMask": "0xf8" }, { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "L1D miss outstandings duration in cycles", + "BriefDescription": "Retired load instructions whose data sources was remote HITM", "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.PENDING", - "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM", + "PEBS": "1", + "PublicDescription": "Retired load instructions whose data sources was remote HITM.", + "SampleAfterValue": "100007", + "UMask": "0x4" }, { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "BriefDescription": "Counts all prefetch data reads that have any response type.", "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x48", - "UMask": "0x2", - "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", + "BriefDescription": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.FB_FULL", - "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x51", - "UMask": "0x1", - "BriefDescription": "L1D data line replacements", + "BriefDescription": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "L1D.REPLACEMENT", - "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", - "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", - "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "BriefDescription": "RFO requests that miss L2 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x22" }, { - "EventCode": "0x60", - "UMask": "0x2", - "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", + "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", - "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.NON_SILENT", + "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.", + "SampleAfterValue": "200003", + "UMask": "0x2" }, { - "EventCode": "0x60", - "UMask": "0x2", - "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", - "CounterMask": "1", - "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.USELESS_HWPF", + "SampleAfterValue": "200003", + "UMask": "0x4" }, { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", - "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", + "BriefDescription": "All requests that miss L2 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "CounterMask": "1", - "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.MISS", + "PublicDescription": "All requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x3f" }, { - "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "BriefDescription": "L2 code requests", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "CounterMask": "1", - "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "UMask": "0xe4" }, { - "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD", + "PEBS": "1", + "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.", + "SampleAfterValue": "100007", + "UMask": "0x8" }, { - "EventCode": "0xB0", - "UMask": "0x1", - "BriefDescription": "Demand Data Read requests sent to uncore", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", - "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB0", - "UMask": "0x2", - "BriefDescription": "Cacheable and noncachaeble code read requests", + "BriefDescription": "Counts all demand & prefetch data reads that have any response type.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", - "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB0", - "UMask": "0x4", - "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", + "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", - "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2" }, { - "EventCode": "0xB0", - "UMask": "0x8", - "BriefDescription": "Demand and prefetch data reads", + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x2" }, { - "EventCode": "0xB0", - "UMask": "0x80", - "BriefDescription": "Any memory transaction that reached the SQ.", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", - "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB2", - "UMask": "0x1", - "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", + "BriefDescription": "Retired load instructions missed L1 cache as data sources", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", - "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "SampleAfterValue": "100003", + "UMask": "0x8" }, { - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0020", + "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xD0", - "UMask": "0x11", - "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "Retired load instructions that miss the STLB.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xD0", - "UMask": "0x12", - "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "L2 cache misses when fetching instructions", "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", - "PublicDescription": "Retired store instructions that miss the STLB.", - "SampleAfterValue": "100003", - "L1_Hit_Indication": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "SampleAfterValue": "200003", + "UMask": "0x24" }, { - "EventCode": "0xD0", - "UMask": "0x21", - "BriefDescription": "Retired load instructions with locked access. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.LOCK_LOADS", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD0", - "UMask": "0x41", - "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xD0", - "UMask": "0x42", - "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts demand data reads that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "L1_Hit_Indication": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xD0", - "UMask": "0x81", - "BriefDescription": "All retired load instructions. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.ALL_LOADS", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD0", - "UMask": "0x82", - "BriefDescription": "All retired store instructions. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.ALL_STORES", - "PublicDescription": "All retired store instructions.", - "SampleAfterValue": "2000003", - "L1_Hit_Indication": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD1", - "UMask": "0x1", - "BriefDescription": "Retired load instructions with L1 cache hits as data sources", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "EventCode": "0xD1", - "UMask": "0x2", - "BriefDescription": "Retired load instructions with L2 cache hits as data sources", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Demand requests to L2 cache", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L2_HIT", - "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "PublicDescription": "Demand requests to L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xe7" }, { - "EventCode": "0xD1", - "UMask": "0x4", - "BriefDescription": "Retired load instructions with L3 cache hits as data sources", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", - "SampleAfterValue": "50021", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD1", - "UMask": "0x8", - "BriefDescription": "Retired load instructions missed L1 cache as data sources", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L1_MISS", - "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xD1", - "UMask": "0x10", - "BriefDescription": "Retired load instructions missed L2 cache as data sources", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L2_MISS", - "PublicDescription": "Retired load instructions missed L2 cache as data sources.", - "SampleAfterValue": "50021", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD1", - "UMask": "0x20", - "BriefDescription": "Retired load instructions missed L3 cache as data sources", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "PublicDescription": "Retired load instructions missed L3 cache as data sources.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD1", - "UMask": "0x40", - "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD2", - "UMask": "0x1", - "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", - "SampleAfterValue": "20011", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD2", - "UMask": "0x2", - "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", - "Data_LA": "1", - "PEBS": "1", + "AnyThread": "1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", - "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", - "SampleAfterValue": "20011", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xD2", - "UMask": "0x4", - "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", - "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", - "SampleAfterValue": "20011", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD2", - "UMask": "0x8", - "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Core-originated cacheable demand requests that refer to L3", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", - "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL057", + "EventCode": "0x2E", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4f" }, { - "EventCode": "0xD3", - "UMask": "0x1", - "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD3", - "UMask": "0x2", - "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD3", - "UMask": "0x4", - "BriefDescription": "Retired load instructions whose data sources was remote HITM", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD3", - "UMask": "0x8", - "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache", + "BriefDescription": "Retired load instructions that miss the STLB.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", "PEBS": "1", + "SampleAfterValue": "100003", + "UMask": "0x11" + }, + { + "BriefDescription": "Counts demand data reads that have any response type.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xD4", - "UMask": "0x4", - "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_MISC_RETIRED.UC", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xF0", - "UMask": "0x40", - "BriefDescription": "L2 writebacks that access L2 cache", + "BriefDescription": "L1D data line replacements", "Counter": "0,1,2,3", - "EventName": "L2_TRANS.L2_WB", - "PublicDescription": "Counts L2 writebacks that access L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x51", + "EventName": "L1D.REPLACEMENT", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xF1", - "UMask": "0x1f", - "BriefDescription": "L2 cache lines filling L2", + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "L2_LINES_IN.ALL", - "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xF2", - "UMask": "0x1", - "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.", + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", "Counter": "0,1,2,3", - "EventName": "L2_LINES_OUT.SILENT", - "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x4" }, { - "EventCode": "0xF2", - "UMask": "0x2", - "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped", + "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", "Counter": "0,1,2,3", - "EventName": "L2_LINES_OUT.NON_SILENT", - "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", + "SampleAfterValue": "100007", + "UMask": "0x40" }, { - "EventCode": "0xF2", - "UMask": "0x4", "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", - "Deprecated": "1", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Deprecated": "1", + "EventCode": "0xF2", "EventName": "L2_LINES_OUT.USELESS_PREF", - "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xF2", - "UMask": "0x4", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", "Counter": "0,1,2,3", - "EventName": "L2_LINES_OUT.USELESS_HWPF", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.PF_HIT", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0xd8" }, { - "EventCode": "0xF4", - "UMask": "0x10", - "BriefDescription": "Number of cache line split locks sent to uncore.", + "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.", "Counter": "0,1,2,3", - "EventName": "SQ_MISC.SPLIT_LOCK", - "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "Offcore": "1", + "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads have any response type.", - "MSRValue": "0x0000010001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { + "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010122", "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x01003C0001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x04003C0001", + "BriefDescription": "Demand Data Read miss L2, no rejects", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0x21" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x10003C0001", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x3F803C0001", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) have any response type.", - "MSRValue": "0x0000010002", + "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) have any response type.", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x01003C0002", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x04003C0002", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x10003C0002", + "BriefDescription": "All retired load instructions.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x81" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x3F803C0002", + "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "PEBS": "1", + "SampleAfterValue": "20011", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", - "MSRValue": "0x0000010004", + "BriefDescription": "Demand Data Read requests", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0xe1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x01003C0004", + "BriefDescription": "All L2 requests", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "All L2 requests.", + "SampleAfterValue": "200003", + "UMask": "0xff" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x04003C0004", + "BriefDescription": "Cycles with L1D load Misses outstanding.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x10003C0004", + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x3F803C0004", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.", - "MSRValue": "0x0000010010", + "BriefDescription": "Number of cache line split locks sent to uncore.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads have any response type.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF4", + "EventName": "SQ_MISC.SPLIT_LOCK", + "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x01003C0010", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x04003C0010", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x10003C0010", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "SampleAfterValue": "200003", + "UMask": "0xc4" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x3F803C0010", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.", - "MSRValue": "0x0000010020", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x01003C0020", + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PEBS": "1", + "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x04003C0020", + "BriefDescription": "RFO requests that hit L2 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x10003C0020", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x3F803C0020", + "BriefDescription": "L1D miss outstandings duration in cycles", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.", - "MSRValue": "0x0000010080", + "BriefDescription": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x01003C0080", + "BriefDescription": "Demand Data Read requests that hit L2 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", + "SampleAfterValue": "200003", + "UMask": "0xc1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x04003C0080", + "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x10003C0080", + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x3F803C0080", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.", - "MSRValue": "0x0000010100", + "BriefDescription": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x01003C0100", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x04003C0100", + "BriefDescription": "Retired store instructions that split across a cacheline boundary.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x42" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x10003C0100", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x3F803C0100", + "BriefDescription": "Any memory transaction that reached the SQ.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x80" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.", - "MSRValue": "0x0000010400", + "BriefDescription": "Cacheable and noncachaeble code read requests", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x01003C0400", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x04003C0400", + "BriefDescription": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x10003C0400", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x3F803C0400", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD have any response type.", - "MSRValue": "0x0000010490", + "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x01003C0490", + "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x04003C0490", + "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL", + "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x10003C0490", + { + "BriefDescription": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3F803C0490", + "BriefDescription": "Counts all demand code reads that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD have any response type.", - "MSRValue": "0x0000010120", + "BriefDescription": "Counts prefetch RFOs that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x01003C0120", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x04003C0120", + "BriefDescription": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x10003C0120", + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3F803C0120", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD have any response type.", - "MSRValue": "0x0000010491", + "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x01003C0491", + "BriefDescription": "Retired load instructions with locked access.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x21" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x04003C0491", + "BriefDescription": "Demand and prefetch data reads", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x10003C0491", + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.", + "SampleAfterValue": "100007", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3F803C0491", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD have any response type.", - "MSRValue": "0x0000010122", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x01003C0122", + "BriefDescription": "Retired load instructions that split across a cacheline boundary.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x41" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x04003C0122", + "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB2", + "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x10003C0122", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3F803C0122", + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads", - "MSRValue": "0x08007C0001", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs)", - "MSRValue": "0x08007C0002", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "MSRValue": "0x08007C0004", + "BriefDescription": "Counts prefetch RFOs that have any response type.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads", - "MSRValue": "0x08007C0010", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", - "MSRValue": "0x08007C0020", + "BriefDescription": "Retired store instructions that miss the STLB.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x12" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", - "MSRValue": "0x08007C0080", + "BriefDescription": "RFO requests to L2 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_RFO", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "SampleAfterValue": "200003", + "UMask": "0xe2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", - "MSRValue": "0x08007C0100", + "BriefDescription": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests", - "MSRValue": "0x08007C0400", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD", - "MSRValue": "0x08007C0490", + "BriefDescription": "Retired load instructions missed L2 cache as data sources", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PEBS": "1", + "PublicDescription": "Retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "50021", + "UMask": "0x10" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD", - "MSRValue": "0x08007C0120", + "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD", - "MSRValue": "0x08007C0491", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD", - "MSRValue": "0x08007C0122", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json index c5d0babe89fc..e197cde15047 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json @@ -1,85 +1,85 @@ [ { - "EventCode": "0xC7", - "UMask": "0x1", - "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xC7", - "UMask": "0x2", - "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0xC7", - "UMask": "0x4", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xC7", - "UMask": "0x8", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xC7", - "UMask": "0x10", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" }, { - "EventCode": "0xC7", - "UMask": "0x20", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0xC7", - "UMask": "0x40", - "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)", + "BriefDescription": "Cycles with any input/output SSE or FP assist", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xCA", + "EventName": "FP_ASSIST.ANY", + "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", + "SampleAfterValue": "100003", + "UMask": "0x1e" }, { - "EventCode": "0xC7", - "UMask": "0x80", - "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)", + "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xCA", - "UMask": "0x1e", - "BriefDescription": "Cycles with any input/output SSE or FP assist", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ASSIST.ANY", - "CounterMask": "1", - "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "UMask": "0x8" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json index 4dc583cfb545..cdf95bd2a73d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -1,482 +1,516 @@ [ { - "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE_16B.IFDATA_STALL", + "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Retired Instructions who experienced iTLB true miss.", "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_UOPS", - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x14", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", + "MSRIndex": "0x3F7", + "MSRValue": "0x408006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", + "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x79", - "UMask": "0x8", "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES", "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x79", - "UMask": "0x10", - "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_DSB_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xE6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x79", - "UMask": "0x18", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", - "CounterMask": "1", - "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x11", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0x79", - "UMask": "0x18", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "4", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x79", - "UMask": "0x20", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_MITE_UOPS", - "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", + "MSRIndex": "0x3F7", + "MSRValue": "0x401006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0x79", - "UMask": "0x24", - "BriefDescription": "Cycles MITE is delivering any Uop", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", - "CounterMask": "1", - "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MITE_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0x79", - "UMask": "0x24", - "BriefDescription": "Cycles MITE is delivering 4 Uops", + "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", - "CounterMask": "4", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", + "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x79", - "UMask": "0x30", "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_CYCLES", "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x30" }, { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles MITE is delivering any Uop", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x24" + }, + { + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_HIT", + "SampleAfterValue": "200003", + "UMask": "0x1" }, { - "EdgeDetect": "1", - "EventCode": "0x79", - "UMask": "0x30", "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_SWITCHES", "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x30" }, { - "EventCode": "0x80", - "UMask": "0x4", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", "Counter": "0,1,2,3", - "EventName": "ICACHE_16B.IFDATA_STALL", - "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.L2_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x13", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0x83", - "UMask": "0x1", - "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "EventName": "ICACHE_64B.IFTAG_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MITE_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "EventCode": "0x83", - "UMask": "0x2", - "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "EventName": "ICACHE_64B.IFTAG_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "MSRIndex": "0x3F7", + "MSRValue": "0x404006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0x83", - "UMask": "0x4", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", - "EventName": "ICACHE_64B.IFTAG_STALL", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "Invert": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", - "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", - "CounterMask": "1", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", - "CounterMask": "1", - "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", - "CounterMask": "2", - "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_STALL", + "SampleAfterValue": "200003", + "UMask": "0x4" }, { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", - "CounterMask": "3", - "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", - "CounterMask": "4", - "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x18" }, { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.STLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x15", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xAB", - "UMask": "0x2", - "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x400406", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", "MSRIndex": "0x3F7", - "TakenAlone": "1", + "MSRValue": "0x420006", + "PEBS": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x200206", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", + "MSRValue": "0x400806", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", + "Counter": "0,1,2,3,4,5,6,7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x400106", + "PEBS": "2", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x400206", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", "MSRIndex": "0x3F7", - "TakenAlone": "1", + "MSRValue": "0x400206", + "PEBS": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x15", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.STLB_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", - "TakenAlone": "1", + "MSRValue": "0x400406", + "PEBS": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x14", + "BriefDescription": "Cycles MITE is delivering 4 Uops", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.ITLB_MISS", - "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x24" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x13", + "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.L2_MISS", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_DSB_CYCLES", + "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x12", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.L1I_MISS", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", + "SampleAfterValue": "2000003", + "UMask": "0x30" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x11", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.DSB_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", - "TakenAlone": "1", + "MSRValue": "0x410006", + "PEBS": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", + "MSRIndex": "0x3F7", + "MSRValue": "0x200206", "PEBS": "1", - "MSRValue": "0x300206", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", "MSRIndex": "0x3F7", - "TakenAlone": "1", + "MSRValue": "0x300206", + "PEBS": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x100206", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", + "MSRValue": "0x100206", + "PEBS": "1", "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", - "TakenAlone": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x420006", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", - "MSRIndex": "0x3F7", "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x410006", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x18" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x408006", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.COUNT", + "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x404006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", - "TakenAlone": "1", + "MSRValue": "0x402006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x402006", + "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", - "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "Invert": "1", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x401006", + "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", - "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_MISS", + "SampleAfterValue": "200003", + "UMask": "0x2" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", - "PEBS": "1", - "MSRValue": "0x400806", + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.L1I_MISS", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", - "TakenAlone": "1", + "MSRValue": "0x12", + "PEBS": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json index 48a9cdf81307..6c3fd89d204d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json @@ -1,1396 +1,1403 @@ [ { - "EventCode": "0x54", - "UMask": "0x1", - "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_CONFLICT", - "PublicDescription": "Number of times a TSX line had a cache conflict.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x54", - "UMask": "0x2", - "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", + "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_CAPACITY", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x54", - "UMask": "0x4", - "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", + "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", - "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x54", - "UMask": "0x8", - "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", - "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x54", - "UMask": "0x10", - "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", - "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x54", - "UMask": "0x20", - "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", - "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x54", - "UMask": "0x40", - "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", + "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", - "PublicDescription": "Number of times we could not allocate Lock Buffer.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_TIMER", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x5d", - "UMask": "0x1", - "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x5d", - "UMask": "0x2", - "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", + "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC2", - "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_MEM", + "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0x5d", - "UMask": "0x4", - "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC3", - "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x5d", - "UMask": "0x8", - "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", + "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC4", - "PublicDescription": "RTM region detected inside HLE.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0x5d", - "UMask": "0x10", - "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", + "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC5", - "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", - "UMask": "0x10", - "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", - "UMask": "0x10", - "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", + "BriefDescription": "Counts all prefetch data reads that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", - "UMask": "0x10", - "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xA3", - "UMask": "0x2", "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "2", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", - "CounterMask": "6", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xB0", - "UMask": "0x10", - "BriefDescription": "Demand Data Read requests who miss L3 cache", + "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", - "PublicDescription": "Demand Data Read requests who miss L3 cache.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xC3", - "UMask": "0x2", - "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "Errata": "SKL089", - "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x1", - "BriefDescription": "Number of times an HLE execution started.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.START", - "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x2", - "BriefDescription": "Number of times an HLE execution successfully committed", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.COMMIT", - "PublicDescription": "Number of times HLE commit succeeded.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x4", - "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", - "PEBS": "1", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered. (PEBS)", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x8", - "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_MEM", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x10", - "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.", + "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_TIMER", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", + "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xC8", - "UMask": "0x20", - "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", + "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", - "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CONFLICT", + "PublicDescription": "Number of times a TSX line had a cache conflict.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x40", - "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", - "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x80", - "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_EVENTS", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "2003", + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC9", - "UMask": "0x1", - "BriefDescription": "Number of times an RTM execution started.", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.START", - "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC9", - "UMask": "0x2", - "BriefDescription": "Number of times an RTM execution successfully committed", + "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.COMMIT", - "PublicDescription": "Number of times RTM commit succeeded.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_MEM", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xC9", - "UMask": "0x4", - "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", - "PEBS": "1", + "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered. (PEBS)", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", + "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC9", - "UMask": "0x8", - "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", + "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED_MEM", - "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC5", + "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xC9", - "UMask": "0x10", - "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", + "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED_TIMER", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC4", + "PublicDescription": "RTM region detected inside HLE.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xC9", - "UMask": "0x20", - "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", + "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", - "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC3", + "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xC9", - "UMask": "0x40", - "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", + "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", - "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC2", + "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xC9", - "UMask": "0x80", - "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", + "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED_EVENTS", - "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", - "PEBS": "2", - "MSRValue": "0x200", - "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "101", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", - "PEBS": "2", - "MSRValue": "0x100", + "BriefDescription": "Number of times an RTM execution successfully committed", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "503", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.COMMIT", + "PublicDescription": "Number of times RTM commit succeeded.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", - "PEBS": "2", - "MSRValue": "0x80", + "BriefDescription": "Counts prefetch RFOs that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "1009", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", - "PEBS": "2", - "MSRValue": "0x40", + "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "2003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", - "PEBS": "2", - "MSRValue": "0x20", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", - "PEBS": "2", - "MSRValue": "0x10", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "20011", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", - "PEBS": "2", - "MSRValue": "0x8", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "50021", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", - "PEBS": "2", - "MSRValue": "0x4", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x3FBC000001", + "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_TIMER", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x083FC00001", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x103FC00001", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x063FC00001", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x063B800001", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x0604000001", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x3FBC000002", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x083FC00002", + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x103FC00002", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x063FC00002", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x063B800002", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x0604000002", + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x3FBC000004", + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x083FC00004", + "BriefDescription": "Number of times an HLE execution successfully committed", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.COMMIT", + "PublicDescription": "Number of times HLE commit succeeded.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x103FC00004", + "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", + "PublicDescription": "Number of times we could not allocate Lock Buffer.", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x063FC00004", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x063B800004", + "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x0604000004", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x3FBC000010", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x083FC00010", + "BriefDescription": "Demand Data Read requests who miss L3 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Demand Data Read requests who miss L3 cache.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x103FC00010", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x063FC00010", + "BriefDescription": "Counts demand data reads that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x063B800010", + "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x0604000010", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x3FBC000020", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x083FC00020", + "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x103FC00020", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x063FC00020", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "503", + "TakenAlone": "1", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x063B800020", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x0604000020", + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x3FBC000080", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x083FC00080", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x103FC00080", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x063FC00080", + "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED", + "PEBS": "1", + "PublicDescription": "Number of times RTM abort was triggered.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x063B800080", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x0604000080", + "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED", + "PEBS": "1", + "PublicDescription": "Number of times HLE abort was triggered.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x3FBC000100", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "20011", + "TakenAlone": "1", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x083FC00100", + "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", + "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x103FC00100", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x063FC00100", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x063B800100", + "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x0604000100", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x3FBC000400", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "101", + "TakenAlone": "1", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x083FC00400", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", + "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x103FC00400", - "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x063FC00400", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x063B800400", + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x0604000400", + "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CAPACITY", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3FBC000490", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x083FC00490", + "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x103FC00490", + "BriefDescription": "Number of times an RTM execution started.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.START", + "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063FC00490", + "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL089", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063B800490", + "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", + "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x0604000490", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3FBC000120", + "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x083FC00120", + "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x103FC00120", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063FC00120", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063B800120", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x0604000120", + "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_EVENTS", + "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", + "SampleAfterValue": "2000003", + "UMask": "0x80" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3FBC000491", + "BriefDescription": "Number of times an HLE execution started.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.START", + "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x083FC00491", + "BriefDescription": "Counts all demand code reads that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x103FC00491", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "1009", + "TakenAlone": "1", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063FC00491", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063B800491", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x0604000491", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3FBC000122", + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x6" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x083FC00122", + "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_EVENTS", + "SampleAfterValue": "2000003", + "UMask": "0x80" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x103FC00122", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063FC00122", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063B800122", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "50021", + "TakenAlone": "1", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x0604000122", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json index 778a541463eb..f6b147ba8ef6 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/other.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json @@ -1,164 +1,116 @@ [ { - "EventCode": "0x28", - "UMask": "0x7", - "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.", + "BriefDescription": "Core cycles the core was throttled due to a pending power level request.", "Counter": "0,1,2,3", - "EventName": "CORE_POWER.LVL0_TURBO_LICENSE", - "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x28", - "UMask": "0x18", - "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.", - "Counter": "0,1,2,3", - "EventName": "CORE_POWER.LVL1_TURBO_LICENSE", - "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.", + "EventName": "CORE_POWER.THROTTLE", + "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0x28", - "UMask": "0x20", - "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", + "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly", "Counter": "0,1,2,3", - "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", - "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xFE", + "EventName": "IDI_MISC.WB_DOWNGRADE", + "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "EventCode": "0x28", - "UMask": "0x40", - "BriefDescription": "Core cycles the core was throttled due to a pending power level request.", + "BriefDescription": "Number of PREFETCHW instructions executed.", "Counter": "0,1,2,3", - "EventName": "CORE_POWER.THROTTLE", - "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x32", - "UMask": "0x1", - "BriefDescription": "Number of PREFETCHNTA instructions executed.", - "Counter": "0,1,2,3", - "EventName": "SW_PREFETCH_ACCESS.NTA", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0x32", - "UMask": "0x2", - "BriefDescription": "Number of PREFETCHT0 instructions executed.", + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.", "Counter": "0,1,2,3", - "EventName": "SW_PREFETCH_ACCESS.T0", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL0_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.", + "SampleAfterValue": "200003", + "UMask": "0x7" }, { - "EventCode": "0x32", - "UMask": "0x4", - "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.", "Counter": "0,1,2,3", - "EventName": "SW_PREFETCH_ACCESS.T1_T2", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL1_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.", + "SampleAfterValue": "200003", + "UMask": "0x18" }, { - "EventCode": "0x32", - "UMask": "0x8", - "BriefDescription": "Number of PREFETCHW instructions executed.", + "BriefDescription": "Number of PREFETCHT0 instructions executed.", "Counter": "0,1,2,3", - "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.T0", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xCB", - "UMask": "0x1", "BriefDescription": "Number of hardware interrupts received by the processor.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCB", "EventName": "HW_INTERRUPTS.RECEIVED", "PublicDescription": "Counts the number of hardware interruptions received by the processor.", "SampleAfterValue": "203", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x1", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITI", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x2", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITFSE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x4", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_SHITFSE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xEF", - "UMask": "0x8", + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDM", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.", + "SampleAfterValue": "200003", + "UMask": "0x20" }, { - "EventCode": "0xEF", - "UMask": "0x10", + "BriefDescription": "Number of PREFETCHNTA instructions executed.", "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDM", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.NTA", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xEF", - "UMask": "0x20", + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDFE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xEF", - "UMask": "0x40", "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDFE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x09", + "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xFE", - "UMask": "0x2", "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xFE", "EventName": "IDI_MISC.WB_UPGRADE", "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xFE", - "UMask": "0x4", - "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly", - "Counter": "0,1,2,3", - "EventName": "IDI_MISC.WB_DOWNGRADE", - "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index 369f56c1d1b5..3bfc6943ddf9 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -1,967 +1,959 @@ [ { - "UMask": "0x1", - "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 0", - "EventName": "INST_RETIRED.ANY", - "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 0" - }, - { - "UMask": "0x2", - "BriefDescription": "Core cycles when the thread is not in halt state", - "Counter": "Fixed counter 1", - "EventName": "CPU_CLK_UNHALTED.THREAD", - "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" - }, - { - "UMask": "0x2", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.ANY_P", + "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "SampleAfterValue": "2000003" }, { - "UMask": "0x3", - "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", - "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "UMask": "0x1" }, { - "EventCode": "0x03", - "UMask": "0x2", - "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", + "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "EventName": "LD_BLOCKS.STORE_FORWARD", - "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "EventCode": "0x03", - "UMask": "0x8", - "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", "Counter": "0,1,2,3", - "EventName": "LD_BLOCKS.NO_SR", - "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x14", + "EventName": "ARITH.DIVIDER_ACTIVE", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0x07", - "UMask": "0x1", "BriefDescription": "False dependencies in MOB due to partial compare on address.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x07", "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x0D", - "UMask": "0x1", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "BriefDescription": "Far branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES", - "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "PublicDescription": "This event counts far branch instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x40" }, { - "EventCode": "0x0D", - "UMask": "0x1", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "BriefDescription": "Counts the number of x87 uops dispatched.", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "AnyThread": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.X87", + "PublicDescription": "Counts the number of x87 uops executed.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x0D", - "UMask": "0x80", - "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", + "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", "Counter": "0,1,2,3", - "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x4C", + "EventName": "LOAD_HIT_PRE.SW_PF", + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "SampleAfterValue": "400009", + "UMask": "0x2" }, { - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", + "BriefDescription": "Total execution stalls.", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.ANY", - "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0x0E", - "UMask": "0x2", - "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "5", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x5" }, { - "EventCode": "0x0E", - "UMask": "0x20", "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", "EventName": "UOPS_ISSUED.SLOW_LEA", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0x14", - "UMask": "0x1", - "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", + "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "EventName": "ARITH.DIVIDER_ACTIVE", - "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "10", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "Invert": "1", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0x3C", - "UMask": "0x0", "BriefDescription": "Thread cycles when thread is not in halt state", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", "EventName": "CPU_CLK_UNHALTED.THREAD_P", "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "SampleAfterValue": "2000003" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "AnyThread": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", - "CounterMask": "1", - "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x3C", - "UMask": "0x1", "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", "AnyThread": "1", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "CounterHTOff": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "Invert": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0x4C", - "UMask": "0x1", - "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", + "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", "Counter": "0,1,2,3", - "EventName": "LOAD_HIT_PRE.SW_PF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "EventCode": "0x59", - "UMask": "0x1", - "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", - "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "8", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EdgeDetect": "1", - "Invert": "1", - "EventCode": "0x5E", - "UMask": "0x1", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_END", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_ACTIVE", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x5E", - "UMask": "0x1", - "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_CYCLES", - "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x87", - "UMask": "0x1", - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "Counter": "0,1,2,3", - "EventName": "ILD_STALL.LCP", - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0xA1", - "UMask": "0x1", "BriefDescription": "Cycles per thread when uops are executed in port 0", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_0", "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA1", - "UMask": "0x2", "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xA1", - "UMask": "0x4", "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xA1", - "UMask": "0x8", "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_3", "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xA1", - "UMask": "0x10", "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_4", "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xA1", - "UMask": "0x20", "BriefDescription": "Cycles per thread when uops are executed in port 5", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_5", "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0xA1", - "UMask": "0x40", "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_6", "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0xA1", - "UMask": "0x80", "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_7", "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" }, { - "EventCode": "0xa2", - "UMask": "0x1", - "BriefDescription": "Resource-related stall cycles", + "AnyThread": "1", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ANY", - "PublicDescription": "Counts resource-related stall cycles.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA2", - "UMask": "0x8", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", - "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.SB", - "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "Counter": "1", + "CounterHTOff": "1", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.PREC_DIST", + "PEBS": "2", + "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", - "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_4_UOPS", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls.", + "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "CounterMask": "5", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x2" }, { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "CounterMask": "8", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_END", + "Invert": "1", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "AnyThread": "1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "CounterMask": "12", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0xA3", - "UMask": "0x10", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", - "CounterMask": "16", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x59", + "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", + "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", - "UMask": "0x14", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "CounterMask": "20", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xA6", - "UMask": "0x1", - "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", - "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_NTAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x10" }, { - "EventCode": "0xA6", - "UMask": "0x2", - "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", - "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xA6", - "UMask": "0x4", - "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", - "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xA6", - "UMask": "0x8", - "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", - "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xA6", - "UMask": "0x10", - "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", - "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", - "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "Fixed counter 2", + "CounterHTOff": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x3" }, { - "EventCode": "0xA6", - "UMask": "0x40", - "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "SampleAfterValue": "400009" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Number of Uops delivered by the LSD.", + "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", "Counter": "0,1,2,3", - "EventName": "LSD.UOPS", - "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC1", + "EventName": "OTHER_ASSISTS.ANY", + "SampleAfterValue": "100003", + "UMask": "0x3f" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", - "CounterMask": "4", - "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Number of Uops delivered by the LSD.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_ACTIVE", - "CounterMask": "1", - "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA8", + "EventName": "LSD.UOPS", + "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "CounterMask": "4", - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "25003", + "UMask": "0x2" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "CounterMask": "3", - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x87", + "EventName": "ILD_STALL.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "CounterMask": "2", - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "16", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "BriefDescription": "Taken branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "CounterMask": "1", - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "PublicDescription": "This event counts taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x20" }, { - "Invert": "1", - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.NO_SR", + "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "SampleAfterValue": "100003", + "UMask": "0x8" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.THREAD", - "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Number of uops executed on the core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE", - "PublicDescription": "Number of uops executed from any thread.", + "BriefDescription": "Core cycles when the thread is not in halt state", + "Counter": "Fixed counter 1", + "CounterHTOff": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "Invert": "1", - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "AnyThread": "1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "BriefDescription": "Direct and indirect near call instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "This event counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x2" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCC", + "EventName": "ROB_MISC_EVENTS.PAUSE_INST", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "BriefDescription": "Resource-related stall cycles", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xa2", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "Counts resource-related stall cycles.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "BriefDescription": "Self-modifying code (SMC) detected.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "EventCode": "0xB1", - "UMask": "0x10", - "BriefDescription": "Counts the number of x87 uops dispatched.", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.X87", - "PublicDescription": "Counts the number of x87 uops executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "25003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xC0", - "UMask": "0x0", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.ANY_P", - "Errata": "SKL091, SKL044", - "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x20" + }, + { + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "CounterMask": "20", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x14" }, { - "EventCode": "0xC0", - "UMask": "0x1", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "PEBS": "2", - "Counter": "1", - "EventName": "INST_RETIRED.PREC_DIST", - "Errata": "SKL091, SKL044", - "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", + "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", + "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "CounterHTOff": "1" + "UMask": "0x1" }, { - "Invert": "1", - "EventCode": "0xC0", - "UMask": "0x1", "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.", - "PEBS": "2", "Counter": "0,2,3", - "EventName": "INST_RETIRED.TOTAL_CYCLES_PS", + "CounterHTOff": "0,2,3", "CounterMask": "10", "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.TOTAL_CYCLES_PS", + "Invert": "1", + "PEBS": "2", "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)", "SampleAfterValue": "2000003", - "CounterHTOff": "0,2,3" + "UMask": "0x1" }, { - "EventCode": "0xC1", - "UMask": "0x3f", - "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", + "BriefDescription": "Retirement slots used.", "Counter": "0,1,2,3", - "EventName": "OTHER_ASSISTS.ANY", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "Counts the retirement slots used.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "Invert": "1", - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Cycles with less than 10 actually retired uops.", + "AnyThread": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", - "CounterMask": "10", - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003" }, { - "Invert": "1", - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles without actually retired uops.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Increments whenever there is an update to the LBR array.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "Counts the retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCC", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EdgeDetect": "1", - "EventCode": "0xC3", - "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type.", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.COUNT", - "CounterMask": "1", - "PublicDescription": "Number of machine clears (nukes) of any type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_CYCLES", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xC3", - "UMask": "0x4", - "BriefDescription": "Self-modifying code (SMC) detected.", + "BriefDescription": "Return instructions retired.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.SMC", - "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "PublicDescription": "This event counts return instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x8" }, { - "EventCode": "0xC4", - "UMask": "0x0", - "BriefDescription": "All (macro) branch instructions retired.", + "BriefDescription": "Instructions retired from execution.", + "Counter": "Fixed counter 0", + "CounterHTOff": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "Errata": "SKL091", - "PublicDescription": "Counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0xC4", - "UMask": "0x1", - "BriefDescription": "Conditional branch instructions retired.", - "PEBS": "1", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA2", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "EventCode": "0xC4", - "UMask": "0x2", - "BriefDescription": "Direct and indirect near call instructions retired.", - "PEBS": "1", + "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", + "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", + "SampleAfterValue": "100007" }, { - "EventCode": "0xC4", - "UMask": "0x4", "BriefDescription": "All (macro) branch instructions retired.", - "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "CounterHTOff": "0,1,2,3", "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "PEBS": "2", "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4" }, { - "EventCode": "0xC4", - "UMask": "0x8", - "BriefDescription": "Return instructions retired.", - "PEBS": "1", + "BriefDescription": "Mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x4" }, { - "EventCode": "0xC4", - "UMask": "0x10", - "BriefDescription": "Counts all not taken macro branch instructions retired.", - "PEBS": "1", + "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0xC4", - "UMask": "0x20", - "BriefDescription": "Taken branch instructions retired.", - "PEBS": "1", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xC4", - "UMask": "0x40", - "BriefDescription": "Counts the number of far branch instructions retired.", - "PEBS": "1", + "BriefDescription": "Conditional branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x0", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PEBS": "1", + "PublicDescription": "This event counts conditional branch instructions retired.", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xC5", - "UMask": "0x1", "BriefDescription": "Mispredicted conditional branch instructions retired.", - "PEBS": "1", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "PEBS": "1", + "PublicDescription": "This event counts mispredicted conditional branch instructions retired.", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xC5", - "UMask": "0x2", - "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", - "PEBS": "1", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0xC5", - "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired.", - "PEBS": "2", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "12", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "2000003", + "UMask": "0xc" }, { - "EventCode": "0xC5", - "UMask": "0x20", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "PEBS": "1", + "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "EventCode": "0xCC", - "UMask": "0x20", - "BriefDescription": "Increments whenever there is an update to the LBR array.", + "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" }, { - "EventCode": "0xCC", - "UMask": "0x40", - "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.PAUSE_INST", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009" + }, + { + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xE6", - "UMask": "0x1", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BACLEARS.ANY", - "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 390bdab1be9d..f31794d3b926 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -4,14 +4,14 @@ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", "MetricGroup": "TopdownL1", "MetricName": "Frontend_Bound", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Frontend_Bound_SMT", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." }, { "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", @@ -22,13 +22,14 @@ }, { "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Bad_Speculation_SMT", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", "MetricGroup": "TopdownL1", "MetricName": "Backend_Bound", @@ -36,7 +37,7 @@ }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", "MetricGroup": "TopdownL1_SMT", "MetricName": "Backend_Bound_SMT", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." @@ -50,7 +51,7 @@ }, { "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Retiring_SMT", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU." @@ -58,7 +59,7 @@ { "BriefDescription": "Instructions Per Cycle (per Logical Processor)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "TopDownL1", + "MetricGroup": "Summary", "MetricName": "IPC" }, { @@ -73,24 +74,6 @@ "MetricGroup": "Branches;Fetch_BW;PGO", "MetricName": "IpTB" }, - { - "BriefDescription": "Branch instructions per taken branch. ", - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;PGO", - "MetricName": "BpTB" - }, - { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", - "MetricGroup": "PGO;IcMiss", - "MetricName": "IFetch_Line_Utilization" - }, - { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", - "MetricGroup": "DSB;Fetch_BW", - "MetricName": "DSB_Coverage" - }, { "BriefDescription": "Cycles Per Instruction (per Logical Processor)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", @@ -104,86 +87,104 @@ "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", "MetricExpr": "4 * cycles", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", - "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "MetricGroup": "TopDownL1_SMT", "MetricName": "SLOTS_SMT" }, { - "BriefDescription": "Instructions per Load (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", "MetricGroup": "Instruction_Type", - "MetricName": "IpL" + "MetricName": "IpLoad" }, { - "BriefDescription": "Instructions per Store (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", "MetricGroup": "Instruction_Type", - "MetricName": "IpS" + "MetricName": "IpStore" }, { - "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Branches;Instruction_Type", - "MetricName": "IpB" + "MetricName": "IpBranch" }, { - "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)", + "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", "MetricGroup": "Branches", "MetricName": "IpCall" }, + { + "BriefDescription": "Branch instructions per taken branch. ", + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTkBranch" + }, + { + "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", + "MetricGroup": "FLOPS;FP_Arith;Instruction_Type", + "MetricName": "IpFLOP" + }, { "BriefDescription": "Total number of retired Instructions", "MetricExpr": "INST_RETIRED.ANY", - "MetricGroup": "Summary", + "MetricGroup": "Summary;TopDownL1", "MetricName": "Instructions" }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", + "MetricGroup": "DSB;Fetch_BW", + "MetricName": "DSB_Coverage" + }, { "BriefDescription": "Instructions Per Cycle (per physical core)", "MetricExpr": "INST_RETIRED.ANY / cycles", - "MetricGroup": "SMT", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC" }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", - "MetricGroup": "SMT", + "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC_SMT" }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles", "MetricGroup": "FLOPS", "MetricName": "FLOPc" }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "MetricGroup": "FLOPS_SMT", "MetricName": "FLOPc_SMT" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", - "MetricGroup": "Pipeline", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )", + "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "BrMispredicts", "MetricName": "Branch_Misprediction_Cost" }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "BrMispredicts_SMT", "MetricName": "Branch_Misprediction_Cost_SMT" }, @@ -213,14 +214,14 @@ }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization", - "MetricConstraint": "NO_NMI_WATCHDOG" + "MetricName": "Page_Walks_Utilization" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", "MetricGroup": "TLB_SMT", "MetricName": "Page_Walks_Utilization_SMT" }, @@ -245,7 +246,7 @@ { "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;Offcore", "MetricName": "L3_Cache_Access_BW" }, { @@ -263,7 +264,7 @@ { "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses", + "MetricGroup": "Cache_Misses;Offcore", "MetricName": "L2MPKI_All" }, { @@ -298,7 +299,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time", + "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, @@ -310,44 +311,56 @@ }, { "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", - "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", + "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Summary", + "MetricGroup": "OS", "MetricName": "Kernel_Utilization" }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", - "MetricGroup": "Memory_Lat", - "MetricName": "DRAM_Read_Latency" + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricGroup": "Memory_Lat;SoC", + "MetricName": "MEM_Read_Latency" }, { "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@", - "MetricGroup": "Memory_BW", - "MetricName": "DRAM_Parallel_Reads" + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@", + "MetricGroup": "Memory_BW;SoC", + "MetricName": "MEM_Parallel_Reads" + }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time", + "MetricGroup": "IO_BW;SoC;Server", + "MetricName": "IO_Write_BW" + }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time", + "MetricGroup": "IO_BW;SoC;Server", + "MetricName": "IO_Read_BW" }, { "BriefDescription": "Socket actual clocks when any core is active on that socket", "MetricExpr": "cha_0@event\\=0x0@", - "MetricGroup": "", + "MetricGroup": "SoC", "MetricName": "Socket_CLKS" }, { - "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )", + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )", - "MetricGroup": "", + "MetricGroup": "Branches;OS", "MetricName": "IpFarBranch" }, { diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json index 9c7e5f8beee2..b80b5d66385d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json @@ -94,17 +94,7 @@ "Unit": "iMC" }, { - "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", - "Counter": "0,1,2,3", - "EventCode": "0x4", - "EventName": "LLC_MISSES.MEM_READ", - "PerPkg": "1", - "ScaleUnit": "64Bytes", - "UMask": "0x3", - "Unit": "iMC" - }, - { - "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills) ", + "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills)", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_M_CAS_COUNT.RD_REG", @@ -119,18 +109,18 @@ "EventCode": "0x4", "EventName": "UNC_M_CAS_COUNT.RD_UNDERFILL", "PerPkg": "1", - "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request). ", + "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request).", "UMask": "0x2", "Unit": "iMC" }, { - "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", + "BriefDescription": "DRAM CAS (Column Address Strobe) Commands.; DRAM WR_CAS (w/ and w/out auto-pre) in Write Major Mode", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "LLC_MISSES.MEM_WRITE", + "EventName": "UNC_M_CAS_COUNT.WR_WMM", "PerPkg": "1", - "ScaleUnit": "64Bytes", - "UMask": "0xC", + "PublicDescription": "Counts the total number or DRAM Write CAS commands issued on this channel while in Write-Major-Mode.", + "UMask": "0x4", "Unit": "iMC" }, { @@ -139,7 +129,7 @@ "EventCode": "0x10", "EventName": "UNC_M_RPQ_INSERTS", "PerPkg": "1", - "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ. ", + "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ.", "Unit": "iMC" }, { @@ -166,7 +156,7 @@ "EventCode": "0x81", "EventName": "UNC_M_WPQ_OCCUPANCY", "PerPkg": "1", - "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests.", + "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happenning in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts???", "Unit": "iMC" } ] diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json index adb42c72f5c8..d7a0270de983 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json @@ -119,18 +119,15 @@ "EventName": "UPI_DATA_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "7.11E-06Bytes", - "UMask": "0x0F", + "UMask": "0xf", "Unit": "UPI LL" }, { - "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0", + "BriefDescription": "PCI Express bandwidth reading at IIO, part 0", "Counter": "0,1", "EventCode": "0x83", - "EventName": "LLC_MISSES.PCIE_READ", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0", "FCMask": "0x07", - "Filter": "ch_mask=0x1f", - "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", - "MetricName": "LLC_MISSES.PCIE_READ", "PerPkg": "1", "PortMask": "0x01", "ScaleUnit": "4Bytes", @@ -138,118 +135,117 @@ "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0", + "BriefDescription": "PCI Express bandwidth reading at IIO, part 1", "Counter": "0,1", "EventCode": "0x83", - "EventName": "LLC_MISSES.PCIE_WRITE", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1", "FCMask": "0x07", - "Filter": "ch_mask=0x1f", - "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", - "MetricName": "LLC_MISSES.PCIE_WRITE", "PerPkg": "1", - "PortMask": "0x01", + "PortMask": "0x02", "ScaleUnit": "4Bytes", - "UMask": "0x01", + "UMask": "0x04", "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth writing at IIO, part 0", + "BriefDescription": "PCI Express bandwidth reading at IIO, part 2", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2", "FCMask": "0x07", - "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", - "MetricName": "LLC_MISSES.PCIE_WRITE", "PerPkg": "1", - "PortMask": "0x01", + "PortMask": "0x04", "ScaleUnit": "4Bytes", - "UMask": "0x01", + "UMask": "0x04", "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth writing at IIO, part 1", + "BriefDescription": "PCI Express bandwidth reading at IIO, part 3", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", "FCMask": "0x07", "PerPkg": "1", - "PortMask": "0x02", + "PortMask": "0x08", "ScaleUnit": "4Bytes", - "UMask": "0x01", + "UMask": "0x04", "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth writing at IIO, part 2", + "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2", + "EventName": "LLC_MISSES.PCIE_READ", "FCMask": "0x07", + "Filter": "ch_mask=0x1f", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", + "MetricName": "LLC_MISSES.PCIE_READ", "PerPkg": "1", - "PortMask": "0x04", + "PortMask": "0x01", "ScaleUnit": "4Bytes", - "UMask": "0x01", + "UMask": "0x04", "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth writing at IIO, part 3", + "BriefDescription": "PCI Express bandwidth writing at IIO, part 0", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0", "FCMask": "0x07", "PerPkg": "1", - "PortMask": "0x08", + "PortMask": "0x01", "ScaleUnit": "4Bytes", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth reading at IIO, part 0", + "BriefDescription": "PCI Express bandwidth writing at IIO, part 1", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1", "FCMask": "0x07", - "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", - "MetricName": "LLC_MISSES.PCIE_READ", "PerPkg": "1", - "PortMask": "0x01", + "PortMask": "0x02", "ScaleUnit": "4Bytes", - "UMask": "0x04", + "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth reading at IIO, part 1", + "BriefDescription": "PCI Express bandwidth writing at IIO, part 2", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2", "FCMask": "0x07", "PerPkg": "1", - "PortMask": "0x02", + "PortMask": "0x04", "ScaleUnit": "4Bytes", - "UMask": "0x04", + "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth reading at IIO, part 2", + "BriefDescription": "PCI Express bandwidth writing at IIO, part 3", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", "FCMask": "0x07", "PerPkg": "1", - "PortMask": "0x04", + "PortMask": "0x08", "ScaleUnit": "4Bytes", - "UMask": "0x04", + "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth reading at IIO, part 3", + "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", + "EventName": "LLC_MISSES.PCIE_WRITE", "FCMask": "0x07", + "Filter": "ch_mask=0x1f", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", + "MetricName": "LLC_MISSES.PCIE_WRITE", "PerPkg": "1", - "PortMask": "0x08", + "PortMask": "0x01", "ScaleUnit": "4Bytes", - "UMask": "0x04", + "UMask": "0x01", "Unit": "IIO" }, { @@ -312,6 +308,16 @@ "UMask": "0x02", "Unit": "CHA" }, + { + "BriefDescription": "FaST wire asserted; Horizontal", + "Counter": "0,1,2,3", + "EventCode": "0xA5", + "EventName": "UNC_CHA_FAST_ASSERTED.HORZ", + "PerPkg": "1", + "PublicDescription": "Counts the number of cycles either the local or incoming distress signals are asserted. Incoming distress includes up, dn and across.", + "UMask": "0x02", + "Unit": "CHA" + }, { "BriefDescription": "Read request from a remote socket which hit in the HitMe Cache to a line In the E state", "Counter": "0,1,2,3", @@ -342,6 +348,46 @@ "UMask": "0x01", "Unit": "CHA" }, + { + "BriefDescription": "Lines Victimized; Lines in E state", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_E", + "PerPkg": "1", + "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", + "UMask": "0x02", + "Unit": "CHA" + }, + { + "BriefDescription": "Lines Victimized; Lines in F State", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_F", + "PerPkg": "1", + "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", + "UMask": "0x08", + "Unit": "CHA" + }, + { + "BriefDescription": "Lines Victimized; Lines in M state", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_M", + "PerPkg": "1", + "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Lines Victimized; Lines in S State", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_S", + "PerPkg": "1", + "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", + "UMask": "0x04", + "Unit": "CHA" + }, { "BriefDescription": "Number of times that an RFO hit in S state.", "Counter": "0,1,2,3", @@ -372,6 +418,65 @@ "UMask": "0x20", "Unit": "CHA" }, + { + "BriefDescription": "Ingress (from CMS) Allocations; IRQ", + "Counter": "0,1,2,3", + "EventCode": "0x13", + "EventName": "UNC_CHA_RxC_INSERTS.IRQ", + "PerPkg": "1", + "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Ingress (from CMS) Request Queue Rejects; PhyAddr Match", + "Counter": "0,1,2,3", + "EventCode": "0x19", + "EventName": "UNC_CHA_RxC_IRQ1_REJECT.PA_MATCH", + "PerPkg": "1", + "PublicDescription": "Ingress (from CMS) Request Queue Rejects; PhyAddr Match", + "UMask": "0x80", + "Unit": "CHA" + }, + { + "BriefDescription": "Ingress (from CMS) Occupancy; IRQ", + "EventCode": "0x11", + "EventName": "UNC_CHA_RxC_OCCUPANCY.IRQ", + "PerPkg": "1", + "PublicDescription": "Counts number of entries in the specified Ingress queue in each cycle.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Snoop filter capacity evictions for E-state entries.", + "Counter": "0,1,2,3", + "EventCode": "0x3D", + "EventName": "UNC_CHA_SF_EVICTION.E_STATE", + "PerPkg": "1", + "PublicDescription": "Counts snoop filter capacity evictions for entries tracking exclusive lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.", + "UMask": "0x02", + "Unit": "CHA" + }, + { + "BriefDescription": "Snoop filter capacity evictions for M-state entries.", + "Counter": "0,1,2,3", + "EventCode": "0x3D", + "EventName": "UNC_CHA_SF_EVICTION.M_STATE", + "PerPkg": "1", + "PublicDescription": "Counts snoop filter capacity evictions for entries tracking modified lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Snoop filter capacity evictions for S-state entries.", + "Counter": "0,1,2,3", + "EventCode": "0x3D", + "EventName": "UNC_CHA_SF_EVICTION.S_STATE", + "PerPkg": "1", + "PublicDescription": "Counts snoop filter capacity evictions for entries tracking shared lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.", + "UMask": "0x04", + "Unit": "CHA" + }, { "BriefDescription": "RspCnflct* Snoop Responses Received", "Counter": "0,1,2,3", @@ -449,7 +554,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x01", - "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -461,7 +566,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x02", - "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -473,7 +578,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x04", - "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -485,7 +590,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x08", - "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -497,7 +602,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x01", - "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, @@ -509,34 +614,226 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x02", - "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU ", + "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU", "Counter": "2,3", "EventCode": "0xC0", "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2", "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x04", - "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU ", + "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU", "Counter": "2,3", "EventCode": "0xC0", "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3", "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x08", - "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part0", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part0. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part1", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part1. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part2", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part2. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part3", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part3. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part0 by a different IIO unit", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part1 by a different IIO unit", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part2 by a different IIO unit", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part3 by a different IIO unit", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part1 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part2 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part3 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, { "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part0", "Counter": "0,1,2,3", @@ -545,7 +842,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x01", - "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -557,7 +854,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x02", - "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -569,7 +866,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x04", - "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -581,7 +878,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x08", - "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -593,7 +890,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x01", - "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, @@ -605,34 +902,130 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x02", - "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU ", + "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU", "Counter": "0,1,2,3", "EventCode": "0xC1", "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2", "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x04", - "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU ", + "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU", "Counter": "0,1,2,3", "EventCode": "0xC1", "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3", "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x08", - "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, + { + "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part0", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part0. Does not include requests made by the same IIO unit. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part1", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part1. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part2", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part2. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part3", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part3. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part0 by a different IIO unit", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part1 by a different IIO unit", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part2 by a different IIO unit", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part3 by a different IIO unit", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, { "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part0 to Memory", "Counter": "0,1,2,3", @@ -729,6 +1122,102 @@ "UMask": "0x01", "Unit": "IIO" }, + { + "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part0 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part1 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part2 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part3 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part0 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part1 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part1 to the MMIO space of an IIO target.In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part2 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part3 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, { "BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken", "Counter": "0,1,2,3", @@ -813,7 +1302,7 @@ "Unit": "M2M" }, { - "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state) ", + "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state)", "Counter": "0,1,2,3", "EventCode": "0x2D", "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_A", @@ -823,7 +1312,7 @@ "Unit": "M2M" }, { - "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state) ", + "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state)", "Counter": "0,1,2,3", "EventCode": "0x2D", "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_I", @@ -833,7 +1322,7 @@ "Unit": "M2M" }, { - "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state) ", + "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state)", "Counter": "0,1,2,3", "EventCode": "0x2D", "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_S", @@ -863,7 +1352,7 @@ "Unit": "M2M" }, { - "BriefDescription": "Multi-socket cacheline Directory update from/to Any state ", + "BriefDescription": "Multi-socket cacheline Directory update from/to Any state", "Counter": "0,1,2,3", "EventCode": "0x2E", "EventName": "UNC_M2M_DIRECTORY_UPDATE.ANY", @@ -918,7 +1407,7 @@ "EventCode": "0x37", "EventName": "UNC_M2M_IMC_READS.ALL", "PerPkg": "1", - "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). ", + "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller).", "UMask": "0x4", "Unit": "M2M" }, @@ -942,6 +1431,16 @@ "UMask": "0x10", "Unit": "M2M" }, + { + "BriefDescription": "M2M Writes Issued to iMC; All, regardless of priority.", + "Counter": "0,1,2,3", + "EventCode": "0x38", + "EventName": "UNC_M2M_IMC_WRITES.NI", + "PerPkg": "1", + "PublicDescription": "M2M Writes Issued to iMC; All, regardless of priority.", + "UMask": "0x80", + "Unit": "M2M" + }, { "BriefDescription": "Partial Non-Isochronous writes to the iMC", "Counter": "0,1,2,3", @@ -976,12 +1475,77 @@ "EventCode": "0x1", "EventName": "UNC_M2M_RxC_AD_INSERTS", "PerPkg": "1", - "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and ", + "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and", "Unit": "M2M" }, { - "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.", + "BriefDescription": "AD Ingress (from CMS) Occupancy", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_M2M_RxC_AD_OCCUPANCY", + "PerPkg": "1", + "PublicDescription": "AD Ingress (from CMS) Occupancy", + "Unit": "M2M" + }, + { + "BriefDescription": "BL Ingress (from CMS) Allocations", + "Counter": "0,1,2,3", + "EventCode": "0x5", + "EventName": "UNC_M2M_RxC_BL_INSERTS", + "PerPkg": "1", + "PublicDescription": "BL Ingress (from CMS) Allocations", + "Unit": "M2M" + }, + { + "BriefDescription": "BL Ingress (from CMS) Occupancy", + "Counter": "0,1,2,3", + "EventCode": "0x6", + "EventName": "UNC_M2M_RxC_BL_OCCUPANCY", + "PerPkg": "1", + "PublicDescription": "BL Ingress (from CMS) Occupancy", + "Unit": "M2M" + }, + { + "BriefDescription": "AD Egress (to CMS) Allocations", + "Counter": "0,1,2,3", + "EventCode": "0x9", + "EventName": "UNC_M2M_TxC_AD_INSERTS", + "PerPkg": "1", + "PublicDescription": "AD Egress (to CMS) Allocations", + "Unit": "M2M" + }, + { + "BriefDescription": "AD Egress (to CMS) Occupancy", + "Counter": "0,1,2,3", + "EventCode": "0xA", + "EventName": "UNC_M2M_TxC_AD_OCCUPANCY", + "PerPkg": "1", + "PublicDescription": "AD Egress (to CMS) Occupancy", + "Unit": "M2M" + }, + { + "BriefDescription": "BL Egress (to CMS) Allocations; All", + "Counter": "0,1,2,3", + "EventCode": "0x15", + "EventName": "UNC_M2M_TxC_BL_INSERTS.ALL", + "PerPkg": "1", + "PublicDescription": "BL Egress (to CMS) Allocations; All", + "UMask": "0x03", + "Unit": "M2M" + }, + { + "BriefDescription": "BL Egress (to CMS) Occupancy; All", "Counter": "0,1,2,3", + "EventCode": "0x16", + "EventName": "UNC_M2M_TxC_BL_OCCUPANCY.ALL", + "PerPkg": "1", + "PublicDescription": "BL Egress (to CMS) Occupancy; All", + "UMask": "0x03", + "Unit": "M2M" + }, + { + "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.", + "Counter": "0,1,2", "EventCode": "0x29", "EventName": "UNC_M3UPI_UPI_PREFETCH_SPAWN", "PerPkg": "1", @@ -1114,23 +1678,23 @@ "Unit": "UPI LL" }, { - "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data", + "BriefDescription": "Null FLITs transmitted from any slot", "Counter": "0,1,2,3", "EventCode": "0x2", - "EventName": "UPI_DATA_BANDWIDTH_TX", + "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL", "PerPkg": "1", - "ScaleUnit": "7.11E-06Bytes", - "UMask": "0x0F", + "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.", + "UMask": "0x27", "Unit": "UPI LL" }, { - "BriefDescription": "Null FLITs transmitted from any slot", + "BriefDescription": "Valid Flits Sent; Data", "Counter": "0,1,2,3", "EventCode": "0x2", - "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL", + "EventName": "UNC_UPI_TxL_FLITS.DATA", "PerPkg": "1", - "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.", - "UMask": "0x27", + "PublicDescription": "Shows legal flit time (hides impact of L0p and L0c).; Count Data Flits (which consume all slots), but how much to count is based on Slot0-2 mask, so count can be 0-3 depending on which slots are enabled for counting..", + "UMask": "0x8", "Unit": "UPI LL" }, { diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json index 7f466c97e485..bbeee1058096 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json @@ -1,284 +1,284 @@ [ { - "EventCode": "0x08", - "UMask": "0x1", - "BriefDescription": "Load misses in all DTLB levels that cause page walks", + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", - "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.STLB_HIT", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0x08", - "UMask": "0x2", - "BriefDescription": "Page walk completed due to a demand data load to a 4K page", + "BriefDescription": "Store misses in all DTLB levels that cause page walks", "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x08", - "UMask": "0x4", - "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", + "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "EventCode": "0x08", - "UMask": "0x8", - "BriefDescription": "Page walk completed due to a demand data load to a 1G page", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", + "SampleAfterValue": "100003", + "UMask": "0x10" }, { - "EventCode": "0x08", - "UMask": "0xe", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0x08", - "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAE", + "EventName": "ITLB.ITLB_FLUSH", + "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", + "SampleAfterValue": "100007", + "UMask": "0x1" }, { - "EventCode": "0x08", - "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_ACTIVE", + "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x08", - "UMask": "0x20", "BriefDescription": "Loads that miss the DTLB and hit the STLB.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", "EventName": "DTLB_LOAD_MISSES.STLB_HIT", "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x1", - "BriefDescription": "Store misses in all DTLB levels that cause page walks", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", - "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0x49", - "UMask": "0x2", - "BriefDescription": "Page walk completed due to a demand data store to a 4K page", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x4", - "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x49", - "UMask": "0x8", - "BriefDescription": "Page walk completed due to a demand data store to a 1G page", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "EventCode": "0x49", - "UMask": "0xe", - "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", + "BriefDescription": "DTLB flush attempts of the thread-specific entries", "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xBD", + "EventName": "TLB_FLUSH.DTLB_THREAD", + "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", + "SampleAfterValue": "100007", + "UMask": "0x1" }, { - "EventCode": "0x49", - "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x49", - "UMask": "0x10", "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x49", - "UMask": "0x20", - "BriefDescription": "Stores that miss the DTLB and hit the STLB.", + "BriefDescription": "Misses at all ITLB levels that cause page walks", "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x4F", - "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", + "BriefDescription": "Stores that miss the DTLB and hit the STLB.", "Counter": "0,1,2,3", - "EventName": "EPT.WALK_PENDING", - "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", + "SampleAfterValue": "100003", + "UMask": "0x20" }, { - "EventCode": "0x85", - "UMask": "0x1", - "BriefDescription": "Misses at all ITLB levels that cause page walks", + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", - "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0xe" }, { - "EventCode": "0x85", - "UMask": "0x2", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0xe" }, { - "EventCode": "0x85", - "UMask": "0x4", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", + "BriefDescription": "Page walk completed due to a demand data store to a 4K page", "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0x85", - "UMask": "0x8", "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0x85", - "UMask": "0xe", "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", "EventName": "ITLB_MISSES.WALK_COMPLETED", "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0xe" }, { - "EventCode": "0x85", - "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", + "BriefDescription": "Page walk completed due to a demand data load to a 4K page", "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0x85", - "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", + "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_ACTIVE", - "CounterMask": "1", - "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "EventCode": "0x85", - "UMask": "0x20", - "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", + "BriefDescription": "Load misses in all DTLB levels that cause page walks", "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.STLB_HIT", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xAE", - "UMask": "0x1", - "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", - "Counter": "0,1,2,3", - "EventName": "ITLB.ITLB_FLUSH", - "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xBD", - "UMask": "0x1", - "BriefDescription": "DTLB flush attempts of the thread-specific entries", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", "Counter": "0,1,2,3", - "EventName": "TLB_FLUSH.DTLB_THREAD", - "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x4F", + "EventName": "EPT.WALK_PENDING", + "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "EventCode": "0xBD", - "UMask": "0x20", "BriefDescription": "STLB flush attempts", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xBD", "EventName": "TLB_FLUSH.STLB_ANY", "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" + }, + { + "BriefDescription": "Page walk completed due to a demand data load to a 1G page", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Page walk completed due to a demand data store to a 1G page", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "100003", + "UMask": "0x8" } ] \ No newline at end of file -- cgit v1.3.1 From 7fedd9b84bf8288ccb7a88aadedffdc9221f31e6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Sep 2020 21:44:51 +0900 Subject: perf evsel: Add evsel__clone() function The evsel__clone() is to create an exactly same evsel from same attributes. The function assumes the given evsel is not configured yet so it cares fields set during event parsing. Those fields are now moved together as Jiri suggested. Note that metric events will be handled by later patch. It will be used by perf stat to generate separate events for each cgroup. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200924124455.336326-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 93 +++++++++++++++++++++++++------------------ 2 files changed, 158 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2ac19a2530a2..1cad6051d8b0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -331,6 +331,110 @@ error_free: goto out; } +static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) +{ + struct evsel_config_term *pos, *tmp; + + list_for_each_entry(pos, &src->config_terms, list) { + tmp = malloc(sizeof(*tmp)); + if (tmp == NULL) + return -ENOMEM; + + *tmp = *pos; + if (tmp->free_str) { + tmp->val.str = strdup(pos->val.str); + if (tmp->val.str == NULL) { + free(tmp); + return -ENOMEM; + } + } + list_add_tail(&tmp->list, &dst->config_terms); + } + return 0; +} + +/** + * evsel__clone - create a new evsel copied from @orig + * @orig: original evsel + * + * The assumption is that @orig is not configured nor opened yet. + * So we only care about the attributes that can be set while it's parsed. + */ +struct evsel *evsel__clone(struct evsel *orig) +{ + struct evsel *evsel; + + BUG_ON(orig->core.fd); + BUG_ON(orig->counts); + BUG_ON(orig->priv); + BUG_ON(orig->per_pkg_mask); + + /* cannot handle BPF objects for now */ + if (orig->bpf_obj) + return NULL; + + evsel = evsel__new(&orig->core.attr); + if (evsel == NULL) + return NULL; + + evsel->core.cpus = perf_cpu_map__get(orig->core.cpus); + evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus); + evsel->core.threads = perf_thread_map__get(orig->core.threads); + evsel->core.nr_members = orig->core.nr_members; + evsel->core.system_wide = orig->core.system_wide; + + if (orig->name) { + evsel->name = strdup(orig->name); + if (evsel->name == NULL) + goto out_err; + } + if (orig->group_name) { + evsel->group_name = strdup(orig->group_name); + if (evsel->group_name == NULL) + goto out_err; + } + if (orig->pmu_name) { + evsel->pmu_name = strdup(orig->pmu_name); + if (evsel->pmu_name == NULL) + goto out_err; + } + if (orig->filter) { + evsel->filter = strdup(orig->filter); + if (evsel->filter == NULL) + goto out_err; + } + evsel->cgrp = cgroup__get(orig->cgrp); + evsel->tp_format = orig->tp_format; + evsel->handler = orig->handler; + evsel->leader = orig->leader; + + evsel->max_events = orig->max_events; + evsel->tool_event = orig->tool_event; + evsel->unit = orig->unit; + evsel->scale = orig->scale; + evsel->snapshot = orig->snapshot; + evsel->per_pkg = orig->per_pkg; + evsel->percore = orig->percore; + evsel->precise_max = orig->precise_max; + evsel->use_uncore_alias = orig->use_uncore_alias; + evsel->is_libpfm_event = orig->is_libpfm_event; + + evsel->exclude_GH = orig->exclude_GH; + evsel->sample_read = orig->sample_read; + evsel->auto_merge_stats = orig->auto_merge_stats; + evsel->collect_stat = orig->collect_stat; + evsel->weak_group = orig->weak_group; + + if (evsel__copy_config_terms(evsel, orig) < 0) + goto out_err; + + return evsel; + +out_err: + evsel__delete(evsel); + return NULL; +} + /* * Returns pointer with encoded error via interface. */ diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 35e3f6d66085..79a860d8e3ee 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -42,65 +42,79 @@ enum perf_tool_event { */ struct evsel { struct perf_evsel core; - struct evlist *evlist; - char *filter; + struct evlist *evlist; + off_t id_offset; + int idx; + int id_pos; + int is_pos; + unsigned int sample_size; + + /* + * These fields can be set in the parse-events code or similar. + * Please check evsel__clone() to copy them properly so that + * they can be released properly. + */ + struct { + char *name; + char *group_name; + const char *pmu_name; + struct tep_event *tp_format; + char *filter; + unsigned long max_events; + double scale; + const char *unit; + struct cgroup *cgrp; + enum perf_tool_event tool_event; + /* parse modifier helper */ + int exclude_GH; + int sample_read; + bool snapshot; + bool per_pkg; + bool percore; + bool precise_max; + bool use_uncore_alias; + bool is_libpfm_event; + bool auto_merge_stats; + bool collect_stat; + bool weak_group; + int bpf_fd; + struct bpf_object *bpf_obj; + }; + + /* + * metric fields are similar, but needs more care as they can have + * references to other metric (evsel). + */ + const char * metric_expr; + const char * metric_name; + struct evsel **metric_events; + struct evsel *metric_leader; + + void *handler; struct perf_counts *counts; struct perf_counts *prev_raw_counts; - int idx; - unsigned long max_events; unsigned long nr_events_printed; - char *name; - double scale; - const char *unit; - struct tep_event *tp_format; - off_t id_offset; struct perf_stat_evsel *stats; void *priv; u64 db_id; - struct cgroup *cgrp; - void *handler; - unsigned int sample_size; - int id_pos; - int is_pos; - enum perf_tool_event tool_event; bool uniquified_name; - bool snapshot; bool supported; bool needs_swap; bool disabled; bool no_aux_samples; bool immediate; bool tracking; - bool per_pkg; - bool precise_max; bool ignore_missing_thread; bool forced_leader; - bool use_uncore_alias; - bool is_libpfm_event; - /* parse modifier helper */ - int exclude_GH; - int sample_read; - unsigned long *per_pkg_mask; - struct evsel *leader; - char *group_name; bool cmdline_group_boundary; - struct list_head config_terms; - struct bpf_object *bpf_obj; - int bpf_fd; - int err; - bool auto_merge_stats; bool merged_stat; - const char * metric_expr; - const char * metric_name; - struct evsel **metric_events; - struct evsel *metric_leader; - bool collect_stat; - bool weak_group; bool reset_group; bool errored; - bool percore; + unsigned long *per_pkg_mask; + struct evsel *leader; + struct list_head config_terms; + int err; int cpu_iter; - const char *pmu_name; struct { evsel__sb_cb_t *cb; void *data; @@ -169,6 +183,7 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr) return evsel__new_idx(attr, 0); } +struct evsel *evsel__clone(struct evsel *orig); struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); /* -- cgit v1.3.1 From 7f3603b631362340774291a961712ec07bbf8122 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Sep 2020 09:50:47 -0700 Subject: KVM: VMX: Rename RDTSCP secondary exec control name to insert "ENABLE" Rename SECONDARY_EXEC_RDTSCP to SECONDARY_EXEC_ENABLE_RDTSCP in preparation for consolidating the logic for adjusting secondary exec controls based on the guest CPUID model. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20200923165048.20486-4-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 2 +- arch/x86/kvm/vmx/capabilities.h | 2 +- arch/x86/kvm/vmx/nested.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 10 +++++----- tools/testing/selftests/kvm/include/x86_64/vmx.h | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cd7de4b401fe..f8ba5289ecb0 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -52,7 +52,7 @@ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES) #define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT) #define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING) -#define SECONDARY_EXEC_RDTSCP VMCS_CONTROL_BIT(RDTSCP) +#define SECONDARY_EXEC_ENABLE_RDTSCP VMCS_CONTROL_BIT(RDTSCP) #define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC) #define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID) #define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 5761736d373a..3a1861403d73 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -151,7 +151,7 @@ static inline bool vmx_umip_emulated(void) static inline bool cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_RDTSCP; + SECONDARY_EXEC_ENABLE_RDTSCP; } static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 473fa40cc924..e8048e01c044 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2286,7 +2286,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_ENABLE_INVPCID | - SECONDARY_EXEC_RDTSCP | + SECONDARY_EXEC_ENABLE_RDTSCP | SECONDARY_EXEC_XSAVES | SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -6404,7 +6404,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high &= SECONDARY_EXEC_DESC | - SECONDARY_EXEC_RDTSCP | + SECONDARY_EXEC_ENABLE_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_APIC_REGISTER_VIRT | diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 641e5ef513ae..f60c64b749b3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2430,7 +2430,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_UNRESTRICTED_GUEST | SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_DESC | - SECONDARY_EXEC_RDTSCP | + SECONDARY_EXEC_ENABLE_RDTSCP | SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -4146,15 +4146,15 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (cpu_has_vmx_rdtscp()) { bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); if (!rdtscp_enabled) - exec_control &= ~SECONDARY_EXEC_RDTSCP; + exec_control &= ~SECONDARY_EXEC_ENABLE_RDTSCP; if (nested) { if (rdtscp_enabled) vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDTSCP; + SECONDARY_EXEC_ENABLE_RDTSCP; else vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDTSCP; + ~SECONDARY_EXEC_ENABLE_RDTSCP; } } @@ -7323,7 +7323,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, * Because it is marked as EmulateOnUD, we need to intercept it here. */ case x86_intercept_rdtscp: - if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) { exception->vector = UD_VECTOR; exception->error_code_valid = false; return X86EMUL_PROPAGATE_FAULT; diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 16fa21ebb99c..54d624dd6c10 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -48,7 +48,7 @@ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define SECONDARY_EXEC_DESC 0x00000004 -#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008 #define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 -- cgit v1.3.1 From d468706e313ca3ec85b0e6e71a960ee0bbadd9f3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 25 Sep 2020 16:34:22 +0200 Subject: KVM: selftests: Add test for user space MSR handling Now that we have the ability to handle MSRs from user space and also to select which ones we do want to prevent in-kernel KVM code from handling, let's add a selftest to show case and verify the API. Signed-off-by: Alexander Graf Message-Id: <20200925143422.21718-9-graf@amazon.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/x86_64/user_msr_test.c | 248 +++++++++++++++++++++ 3 files changed, 250 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/user_msr_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 452787152748..307ceaadbbb9 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -11,6 +11,7 @@ /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test +/x86_64/user_msr_test /x86_64/vmx_preemption_timer_test /x86_64/svm_vmcall_test /x86_64/sync_regs_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4a166588d99f..80d5c348354c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -55,6 +55,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs +TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c new file mode 100644 index 000000000000..cbe1b08890ff --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER + * + * Copyright (C) 2020, Amazon Inc. + * + * This is a functional test to verify that we can deflect MSR events + * into user space. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 5 + +static u32 msr_reads, msr_writes; + +static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_deadbeef[1] = { 0x1 }; + +static void deny_msr(uint8_t *bitmap, u32 msr) +{ + u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); + + bitmap[idx / 8] &= ~(1 << (idx % 8)); +} + +static void prepare_bitmaps(void) +{ + memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000)); + memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write)); + memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000)); + memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000)); + memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read)); + + deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL); + deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK); + deny_msr(bitmap_c0000000_read, MSR_GS_BASE); +} + +struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_DENY, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000_write, + }, { + .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, + .base = 0x40000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_40000000, + }, { + .flags = KVM_MSR_FILTER_READ, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000_read, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000, + }, { + .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ, + .base = 0xdeadbeef, + .nmsrs = 1, + .bitmap = bitmap_deadbeef, + }, + }, +}; + +struct kvm_msr_filter no_filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, +}; + +static void guest_msr_calls(bool trapped) +{ + /* This goes into the in-kernel emulation */ + wrmsr(MSR_SYSCALL_MASK, 0); + + if (trapped) { + /* This goes into user space emulation */ + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE); + } else { + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE); + } + + /* If trapped == true, this goes into user space emulation */ + wrmsr(MSR_IA32_POWER_CTL, 0x1234); + + /* This goes into the in-kernel emulation */ + rdmsr(MSR_IA32_POWER_CTL); + + /* Invalid MSR, should always be handled by user space exit */ + GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef); + wrmsr(0xdeadbeef, 0x1234); +} + +static void guest_code(void) +{ + guest_msr_calls(true); + + /* + * Disable msr filtering, so that the kernel + * handles everything in the next round + */ + GUEST_SYNC(0); + + guest_msr_calls(false); + + GUEST_DONE(); +} + +static int handle_ucall(struct kvm_vm *vm) +{ + struct ucall uc; + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("Guest assertion not met"); + break; + case UCALL_SYNC: + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter); + break; + case UCALL_DONE: + return 1; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + return 0; +} + +static void handle_rdmsr(struct kvm_run *run) +{ + run->msr.data = run->msr.index; + msr_reads++; + + if (run->msr.index == MSR_SYSCALL_MASK || + run->msr.index == MSR_GS_BASE) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR read trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "MSR deadbeef read trap w/o inval fault"); + } +} + +static void handle_wrmsr(struct kvm_run *run) +{ + /* ignore */ + msr_writes++; + + if (run->msr.index == MSR_IA32_POWER_CTL) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for MSR_IA32_POWER_CTL incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR_IA32_POWER_CTL trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for deadbeef incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "deadbeef trap w/o inval fault"); + } +} + +int main(int argc, char *argv[]) +{ + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_USER_SPACE_MSR, + .args[0] = KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER, + }; + struct kvm_vm *vm; + struct kvm_run *run; + int rc; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, &cap); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + prepare_bitmaps(); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter); + + while (1) { + rc = _vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); + + switch (run->exit_reason) { + case KVM_EXIT_X86_RDMSR: + handle_rdmsr(run); + break; + case KVM_EXIT_X86_WRMSR: + handle_wrmsr(run); + break; + case KVM_EXIT_IO: + if (handle_ucall(vm)) + goto done; + break; + } + + } + +done: + TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); + TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); + + kvm_vm_free(vm); + + return 0; +} -- cgit v1.3.1 From 0c899c25d754ae386940f0e1b86b31d3921480b6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 24 Sep 2020 14:45:27 +0200 Subject: KVM: x86: do not attempt TSC synchronization on guest writes KVM special-cases writes to MSR_IA32_TSC so that all CPUs have the same base for the TSC. This logic is complicated, and we do not want it to have any effect once the VM is started. In particular, if any guest started to synchronize its TSCs with writes to MSR_IA32_TSC rather than MSR_IA32_TSC_ADJUST, the additional effect of kvm_write_tsc code would be uncharted territory. Therefore, this patch makes writes to MSR_IA32_TSC behave essentially the same as writes to MSR_IA32_TSC_ADJUST when they come from the guest. A new selftest (which passes both before and after the patch) checks the current semantics of writes to MSR_IA32_TSC and MSR_IA32_TSC_ADJUST originating from both the host and the guest. Upcoming work to remove the special side effects of host-initiated writes to MSR_IA32_TSC and MSR_IA32_TSC_ADJUST will be able to build onto this test, adjusting the host side to use the new APIs and achieve the same effect. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 30 ++-- tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c | 168 +++++++++++++++++++++ 3 files changed, 179 insertions(+), 20 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c (limited to 'tools') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 411f6103532b..c4015a43cc8a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2107,12 +2107,6 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) #endif } -static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) -{ - u64 curr_offset = vcpu->arch.l1_tsc_offset; - vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; -} - /* * Multiply tsc by a fixed point number represented by ratio. * @@ -2174,14 +2168,13 @@ static inline bool kvm_check_tsc_unstable(void) return check_tsc_unstable(); } -void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) +static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) { struct kvm *kvm = vcpu->kvm; u64 offset, ns, elapsed; unsigned long flags; bool matched; bool already_matched; - u64 data = msr->data; bool synchronizing = false; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); @@ -2190,7 +2183,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { - if (data == 0 && msr->host_initiated) { + if (data == 0) { /* * detection of vcpu initialization -- need to sync * with other vCPUs. This particularly helps to keep @@ -2260,9 +2253,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; - if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) - update_ia32_tsc_adjust_msr(vcpu, offset); - kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); @@ -2277,8 +2267,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); } -EXPORT_SYMBOL_GPL(kvm_write_tsc); - static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { @@ -3073,7 +3061,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.msr_ia32_power_ctl = data; break; case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr_info); + if (msr_info->host_initiated) { + kvm_synchronize_tsc(vcpu, data); + } else { + u64 adj = kvm_compute_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset; + adjust_tsc_offset_guest(vcpu, adj); + vcpu->arch.ia32_tsc_adjust_msr += adj; + } break; case MSR_IA32_XSS: if (!msr_info->host_initiated && @@ -9839,7 +9833,6 @@ fail_mmu_destroy: void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { - struct msr_data msr; struct kvm *kvm = vcpu->kvm; kvm_hv_vcpu_postcreate(vcpu); @@ -9847,10 +9840,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) if (mutex_lock_killable(&vcpu->mutex)) return; vcpu_load(vcpu); - msr.data = 0x0; - msr.index = MSR_IA32_TSC; - msr.host_initiated = true; - kvm_write_tsc(vcpu, &msr); + kvm_synchronize_tsc(vcpu, 0); vcpu_put(vcpu); /* poll control enabled by default */ diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 80d5c348354c..7ebe71fbca53 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -55,6 +55,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs +TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c new file mode 100644 index 000000000000..f8e761149daa --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST. + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include +#include +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 0 + +#define UNITY (1ull << 30) +#define HOST_ADJUST (UNITY * 64) +#define GUEST_STEP (UNITY * 4) +#define ROUND(x) ((x + UNITY / 2) & -UNITY) +#define rounded_rdmsr(x) ROUND(rdmsr(x)) +#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x)) + +#define GUEST_ASSERT_EQ(a, b) do { \ + __typeof(a) _a = (a); \ + __typeof(b) _b = (b); \ + if (_a != _b) \ + ucall(UCALL_ABORT, 4, \ + "Failed guest assert: " \ + #a " == " #b, __LINE__, _a, _b); \ + } while(0) + +static void guest_code(void) +{ + u64 val = 0; + + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ + val = 1ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ + GUEST_SYNC(2); + val = 2ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC_ADJUST, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Host: setting the TSC offset. */ + GUEST_SYNC(3); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the + * host-side offset and affect both MSRs. + */ + GUEST_SYNC(4); + val = 3ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC_ADJUST, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side + * offset is now visible in MSR_IA32_TSC_ADJUST. + */ + GUEST_SYNC(5); + val = 4ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + + GUEST_DONE(); +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) +{ + struct ucall uc; + + vcpu_args_set(vm, vcpuid, 1, vcpuid); + + vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL); + + switch (get_ucall(vm, vcpuid, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + return; + case UCALL_DONE: + return; + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%ld\n" \ + "\tvalues: %#lx, %#lx", (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + } +} + +int main(void) +{ + struct kvm_vm *vm; + uint64_t val; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + val = 0; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ + run_vcpu(vm, VCPU_ID, 1); + val = 1ull * GUEST_STEP; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ + run_vcpu(vm, VCPU_ID, 2); + val = 2ull * GUEST_STEP; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Host: writes to MSR_IA32_TSC set the host-side offset + * and therefore do not change MSR_IA32_TSC_ADJUST. + */ + vcpu_set_msr(vm, 0, MSR_IA32_TSC, HOST_ADJUST + val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + run_vcpu(vm, VCPU_ID, 3); + + /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */ + vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, UNITY * 123456); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + ASSERT_EQ(vcpu_get_msr(vm, 0, MSR_IA32_TSC_ADJUST), UNITY * 123456); + + /* Restore previous value. */ + vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the + * host-side offset and affect both MSRs. + */ + run_vcpu(vm, VCPU_ID, 4); + val = 3ull * GUEST_STEP; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side + * offset is now visible in MSR_IA32_TSC_ADJUST. + */ + run_vcpu(vm, VCPU_ID, 5); + val = 4ull * GUEST_STEP; + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + + kvm_vm_free(vm); + + return 0; +} -- cgit v1.3.1 From d1c5a0e86a4e39c9f1bfbfb5ced2ef14103cbf45 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Sep 2020 21:44:52 +0900 Subject: perf stat: Add --for-each-cgroup option The --for-each-cgroup option is a syntax sugar to monitor large number of cgroups easily. Current command line requires to list all the events and cgroups even if users want to monitor same events for each cgroup. This patch addresses that usage by copying given events for each cgroup on user's behalf. For instance, if they want to monitor 6 events for 200 cgroups each they should write 1200 event names (with -e) AND 1200 cgroup names (with -G) on the command line. But with this change, they can just specify 6 events and 200 cgroups with a new option. A simpler example below: It wants to measure 3 events for 2 cgroups ('A' and 'B'). The result is that total 6 events are counted like below. $ perf stat -a -e cpu-clock,cycles,instructions --for-each-cgroup A,B sleep 1 Performance counter stats for 'system wide': 988.18 msec cpu-clock A # 0.987 CPUs utilized 3,153,761,702 cycles A # 3.200 GHz (100.00%) 8,067,769,847 instructions A # 2.57 insn per cycle (100.00%) 982.71 msec cpu-clock B # 0.982 CPUs utilized 3,136,093,298 cycles B # 3.182 GHz (99.99%) 8,109,619,327 instructions B # 2.58 insn per cycle (99.99%) 1.001228054 seconds time elapsed Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200924124455.336326-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 5 +++ tools/perf/builtin-stat.c | 27 +++++++++++- tools/perf/util/cgroup.c | 79 ++++++++++++++++++++++++++++++++++ tools/perf/util/cgroup.h | 1 + tools/perf/util/stat.h | 1 + 5 files changed, 112 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 5bf3d7ae4660..9f9f29025e49 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -166,6 +166,11 @@ use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'. If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'. +--for-each-cgroup name:: +Expand event list for each cgroup in "name" (allow multiple cgroups separated +by comma). This has same effect that repeating -e option and -G option for +each event x name. This option cannot be used with -G/--cgroup option. + -o file:: --output file:: Print the output into the designated file. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cbb2d977eec7..0c9e21a29795 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1060,6 +1060,17 @@ static int parse_control_option(const struct option *opt, return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close); } +static int parse_stat_cgroups(const struct option *opt, + const char *str, int unset) +{ + if (stat_config.cgroup_list) { + pr_err("--cgroup and --for-each-cgroup cannot be used together\n"); + return -1; + } + + return parse_cgroups(opt, str, unset); +} + static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1103,7 +1114,9 @@ static struct option stat_options[] = { OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", - "monitor event in cgroup name only", parse_cgroups), + "monitor event in cgroup name only", parse_stat_cgroups), + OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name", + "expand events for each cgroup"), OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), OPT_INTEGER(0, "log-fd", &output_fd, @@ -2213,6 +2226,18 @@ int cmd_stat(int argc, const char **argv) if (add_default_attributes()) goto out; + if (stat_config.cgroup_list) { + if (nr_cgroups > 0) { + pr_err("--cgroup and --for-each-cgroup cannot be used together\n"); + parse_options_usage(stat_usage, stat_options, "G", 1); + parse_options_usage(NULL, stat_options, "for-each-cgroup", 0); + goto out; + } + + if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list) < 0) + goto out; + } + target__validate(&target); if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 050dea9f1e88..8b6a4fa49082 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -193,6 +193,85 @@ int parse_cgroups(const struct option *opt, const char *str, return 0; } +int evlist__expand_cgroup(struct evlist *evlist, const char *str) +{ + struct evlist *orig_list, *tmp_list; + struct evsel *pos, *evsel, *leader; + struct cgroup *cgrp = NULL; + const char *p, *e, *eos = str + strlen(str); + int ret = -1; + + if (evlist->core.nr_entries == 0) { + fprintf(stderr, "must define events before cgroups\n"); + return -EINVAL; + } + + orig_list = evlist__new(); + tmp_list = evlist__new(); + if (orig_list == NULL || tmp_list == NULL) { + fprintf(stderr, "memory allocation failed\n"); + return -ENOMEM; + } + + /* save original events and init evlist */ + perf_evlist__splice_list_tail(orig_list, &evlist->core.entries); + evlist->core.nr_entries = 0; + + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + /* allow empty cgroups, i.e., skip */ + if (e - str) { + /* termination added */ + char *name = strndup(str, e - str); + if (!name) + goto out_err; + + cgrp = cgroup__new(name); + free(name); + if (cgrp == NULL) + goto out_err; + } else { + cgrp = NULL; + } + + leader = NULL; + evlist__for_each_entry(orig_list, pos) { + evsel = evsel__clone(pos); + if (evsel == NULL) + goto out_err; + + cgroup__put(evsel->cgrp); + evsel->cgrp = cgroup__get(cgrp); + + if (evsel__is_group_leader(pos)) + leader = evsel; + evsel->leader = leader; + + evlist__add(tmp_list, evsel); + } + /* cgroup__new() has a refcount, release it here */ + cgroup__put(cgrp); + nr_cgroups++; + + perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries); + tmp_list->core.nr_entries = 0; + + if (!p) { + ret = 0; + break; + } + str = p+1; + } + +out_err: + evlist__delete(orig_list); + evlist__delete(tmp_list); + + return ret; +} + static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id, bool create, const char *path) { diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index e98d5975fe55..32893018296f 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -24,6 +24,7 @@ void cgroup__put(struct cgroup *cgroup); struct evlist; struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name); +int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups); void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f36c8c92efa0..487010c624be 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -145,6 +145,7 @@ struct perf_stat_config { int ctl_fd; int ctl_fd_ack; bool ctl_fd_close; + const char *cgroup_list; }; void perf_stat__set_big_num(int set); -- cgit v1.3.1 From b214ba8c4275d66029ad34ad222326dca43e7e26 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Sep 2020 21:44:53 +0900 Subject: perf tools: Copy metric events properly when expand cgroups The metricgroup__copy_metric_events() is to handle metrics events when expanding event for cgroups. As the metric events keep pointers to evsel, it should be refreshed when events are cloned during the operation. The perf_stat__collect_metric_expr() is also called in case an event has a metric directly. During the copy, it references evsel by index as the evlist now has cloned evsels for the given cgroup. Also kernel test robot found an issue in the python module import so add empty implementations of those two functions to fix it. Reported-by: kernel test robot Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: John Garry Cc: Kajol Jain Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200924124455.336326-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 +- tools/perf/util/cgroup.c | 23 +++++++++++- tools/perf/util/cgroup.h | 4 +- tools/perf/util/evlist.c | 11 ++++++ tools/perf/util/evlist.h | 1 + tools/perf/util/metricgroup.c | 85 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/metricgroup.h | 6 +++ tools/perf/util/python.c | 19 ++++++++++ 8 files changed, 149 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0c9e21a29795..2751699a8969 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2234,7 +2234,8 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list) < 0) + if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, + &stat_config.metric_events) < 0) goto out; } diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 8b6a4fa49082..6e64c908fa71 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -3,6 +3,9 @@ #include "evsel.h" #include "cgroup.h" #include "evlist.h" +#include "rblist.h" +#include "metricgroup.h" +#include "stat.h" #include #include #include @@ -193,10 +196,12 @@ int parse_cgroups(const struct option *opt, const char *str, return 0; } -int evlist__expand_cgroup(struct evlist *evlist, const char *str) +int evlist__expand_cgroup(struct evlist *evlist, const char *str, + struct rblist *metric_events) { struct evlist *orig_list, *tmp_list; struct evsel *pos, *evsel, *leader; + struct rblist orig_metric_events; struct cgroup *cgrp = NULL; const char *p, *e, *eos = str + strlen(str); int ret = -1; @@ -217,6 +222,13 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str) perf_evlist__splice_list_tail(orig_list, &evlist->core.entries); evlist->core.nr_entries = 0; + if (metric_events) { + orig_metric_events = *metric_events; + rblist__init(metric_events); + } else { + rblist__init(&orig_metric_events); + } + for (;;) { p = strchr(str, ','); e = p ? p : eos; @@ -255,6 +267,14 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str) cgroup__put(cgrp); nr_cgroups++; + if (metric_events) { + perf_stat__collect_metric_expr(tmp_list); + if (metricgroup__copy_metric_events(tmp_list, cgrp, + metric_events, + &orig_metric_events) < 0) + break; + } + perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; @@ -268,6 +288,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str) out_err: evlist__delete(orig_list); evlist__delete(tmp_list); + rblist__exit(&orig_metric_events); return ret; } diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index 32893018296f..eea6df8ee373 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -22,9 +22,11 @@ struct cgroup *cgroup__get(struct cgroup *cgroup); void cgroup__put(struct cgroup *cgroup); struct evlist; +struct rblist; struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name); -int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups); +int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, + struct rblist *metric_events); void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e971daf946d0..8bdf3d2c907c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1969,3 +1969,14 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) return err; } + +struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->idx == idx) + return evsel; + } + return NULL; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bc38a53f6a1a..e1a450322bc5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -386,4 +386,5 @@ int evlist__ctlfd_ack(struct evlist *evlist); #define EVLIST_ENABLED_MSG "Events enabled\n" #define EVLIST_DISABLED_MSG "Events disabled\n" +struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 941702cb6a79..55ea514ac0ce 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -24,6 +24,7 @@ #include #include "util.h" #include +#include "cgroup.h" struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, @@ -1120,3 +1121,87 @@ bool metricgroup__has_metric(const char *metric) } return false; } + +int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, + struct rblist *new_metric_events, + struct rblist *old_metric_events) +{ + unsigned i; + + for (i = 0; i < rblist__nr_entries(old_metric_events); i++) { + struct rb_node *nd; + struct metric_event *old_me, *new_me; + struct metric_expr *old_expr, *new_expr; + struct evsel *evsel; + size_t alloc_size; + int idx, nr; + + nd = rblist__entry(old_metric_events, i); + old_me = container_of(nd, struct metric_event, nd); + + evsel = evlist__find_evsel(evlist, old_me->evsel->idx); + if (!evsel) + return -EINVAL; + new_me = metricgroup__lookup(new_metric_events, evsel, true); + if (!new_me) + return -ENOMEM; + + pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n", + cgrp ? cgrp->name : "root", evsel->name, evsel->idx); + + list_for_each_entry(old_expr, &old_me->head, nd) { + new_expr = malloc(sizeof(*new_expr)); + if (!new_expr) + return -ENOMEM; + + new_expr->metric_expr = old_expr->metric_expr; + new_expr->metric_name = old_expr->metric_name; + new_expr->metric_unit = old_expr->metric_unit; + new_expr->runtime = old_expr->runtime; + + if (old_expr->metric_refs) { + /* calculate number of metric_events */ + for (nr = 0; old_expr->metric_refs[nr].metric_name; nr++) + continue; + alloc_size = sizeof(*new_expr->metric_refs); + new_expr->metric_refs = calloc(nr + 1, alloc_size); + if (!new_expr->metric_refs) { + free(new_expr); + return -ENOMEM; + } + + memcpy(new_expr->metric_refs, old_expr->metric_refs, + nr * alloc_size); + } else { + new_expr->metric_refs = NULL; + } + + /* calculate number of metric_events */ + for (nr = 0; old_expr->metric_events[nr]; nr++) + continue; + alloc_size = sizeof(*new_expr->metric_events); + new_expr->metric_events = calloc(nr + 1, alloc_size); + if (!new_expr->metric_events) { + free(new_expr->metric_refs); + free(new_expr); + return -ENOMEM; + } + + /* copy evsel in the same position */ + for (idx = 0; idx < nr; idx++) { + evsel = old_expr->metric_events[idx]; + evsel = evlist__find_evsel(evlist, evsel->idx); + if (evsel == NULL) { + free(new_expr->metric_events); + free(new_expr->metric_refs); + free(new_expr); + return -EINVAL; + } + new_expr->metric_events[idx] = evsel; + } + + list_add(&new_expr->nd, &new_me->head); + } + } + return 0; +} diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 491a5d78252d..ed1b9392e624 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -7,11 +7,13 @@ #include #include "pmu-events/pmu-events.h" +struct evlist; struct evsel; struct evlist; struct option; struct rblist; struct pmu_events_map; +struct cgroup; struct metric_event { struct rb_node nd; @@ -55,4 +57,8 @@ void metricgroup__print(bool metrics, bool groups, char *filter, bool metricgroup__has_metric(const char *metric); int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); + +int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, + struct rblist *new_metric_events, + struct rblist *old_metric_events); #endif diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 74f85948d101..ae8edde7c50e 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -15,6 +15,8 @@ #include "thread_map.h" #include "trace-event.h" #include "mmap.h" +#include "stat.h" +#include "metricgroup.h" #include "util/env.h" #include #include "util.h" @@ -60,6 +62,23 @@ int parse_callchain_record(const char *arg __maybe_unused, */ struct perf_env perf_env; +/* + * Add this one here not to drag util/stat-shadow.c + */ +void perf_stat__collect_metric_expr(struct evlist *evsel_list) +{ +} + +/* + * Add this one here not to drag util/metricgroup.c + */ +int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, + struct rblist *new_metric_events, + struct rblist *old_metric_events) +{ + return 0; +} + /* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. -- cgit v1.3.1 From 89fb1ca2abb7fba6587a5768cfee9f89daf9be6f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Sep 2020 21:44:54 +0900 Subject: perf tools: Allow creation of cgroup without open This is a preparation for a test case of expanding events for multiple cgroups. Instead of using real system cgroup, the test will use fake cgroups so it needs a way to have them without a open file descriptor. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200924124455.336326-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- tools/perf/util/cgroup.c | 19 ++++++++++++------- tools/perf/util/cgroup.h | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2751699a8969..b01af171d94f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2235,7 +2235,7 @@ int cmd_stat(int argc, const char **argv) } if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, - &stat_config.metric_events) < 0) + &stat_config.metric_events, true) < 0) goto out; } diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 6e64c908fa71..b81324a13a2b 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -51,7 +51,7 @@ static struct cgroup *evlist__find_cgroup(struct evlist *evlist, const char *str return NULL; } -static struct cgroup *cgroup__new(const char *name) +static struct cgroup *cgroup__new(const char *name, bool do_open) { struct cgroup *cgroup = zalloc(sizeof(*cgroup)); @@ -61,9 +61,14 @@ static struct cgroup *cgroup__new(const char *name) cgroup->name = strdup(name); if (!cgroup->name) goto out_err; - cgroup->fd = open_cgroup(name); - if (cgroup->fd == -1) - goto out_free_name; + + if (do_open) { + cgroup->fd = open_cgroup(name); + if (cgroup->fd == -1) + goto out_free_name; + } else { + cgroup->fd = -1; + } } return cgroup; @@ -79,7 +84,7 @@ struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name) { struct cgroup *cgroup = evlist__find_cgroup(evlist, name); - return cgroup ?: cgroup__new(name); + return cgroup ?: cgroup__new(name, true); } static int add_cgroup(struct evlist *evlist, const char *str) @@ -197,7 +202,7 @@ int parse_cgroups(const struct option *opt, const char *str, } int evlist__expand_cgroup(struct evlist *evlist, const char *str, - struct rblist *metric_events) + struct rblist *metric_events, bool open_cgroup) { struct evlist *orig_list, *tmp_list; struct evsel *pos, *evsel, *leader; @@ -240,7 +245,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, if (!name) goto out_err; - cgrp = cgroup__new(name); + cgrp = cgroup__new(name, open_cgroup); free(name); if (cgrp == NULL) goto out_err; diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index eea6df8ee373..162906f3412a 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -26,7 +26,7 @@ struct rblist; struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name); int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, - struct rblist *metric_events); + struct rblist *metric_events, bool open_cgroup); void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); -- cgit v1.3.1 From 40b74c30ffb97c668e9745a098101fecaaec0ea2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Sep 2020 21:44:55 +0900 Subject: perf test: Add expand cgroup event test It'll expand given events for cgroups A, B and C. $ perf test -v expansion 69: Event expansion for cgroups : --- start --- test child forked, pid 983140 metric expr 1 / IPC for CPI metric expr instructions / cycles for IPC found event instructions found event cycles adding {instructions,cycles}:W copying metric event for cgroup 'A': instructions (idx=0) copying metric event for cgroup 'B': instructions (idx=0) copying metric event for cgroup 'C': instructions (idx=0) test child finished with 0 ---- end ---- Event expansion for cgroups: Ok Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: John Garry Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200924124455.336326-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 + tools/perf/tests/expand-cgroup.c | 241 +++++++++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 247 insertions(+) create mode 100644 tools/perf/tests/expand-cgroup.c (limited to 'tools') diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 69bea7996f18..4d15bf6041fb 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -61,6 +61,7 @@ perf-y += demangle-java-test.o perf-y += pfm.o perf-y += parse-metric.o perf-y += pe-file-parsing.o +perf-y += expand-cgroup.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 651b8ea3354a..132bdb3e6c31 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -345,6 +345,10 @@ static struct test generic_tests[] = { .desc = "PE file support", .func = test__pe_file_parsing, }, + { + .desc = "Event expansion for cgroups", + .func = test__expand_cgroup_events, + }, { .func = NULL, }, diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c new file mode 100644 index 000000000000..d5771e4d094f --- /dev/null +++ b/tools/perf/tests/expand-cgroup.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "tests.h" +#include "debug.h" +#include "evlist.h" +#include "cgroup.h" +#include "rblist.h" +#include "metricgroup.h" +#include "parse-events.h" +#include "pmu-events/pmu-events.h" +#include "pfm.h" +#include +#include +#include +#include + +static int test_expand_events(struct evlist *evlist, + struct rblist *metric_events) +{ + int i, ret = TEST_FAIL; + int nr_events; + bool was_group_event; + int nr_members; /* for the first evsel only */ + const char cgrp_str[] = "A,B,C"; + const char *cgrp_name[] = { "A", "B", "C" }; + int nr_cgrps = ARRAY_SIZE(cgrp_name); + char **ev_name; + struct evsel *evsel; + + TEST_ASSERT_VAL("evlist is empty", !perf_evlist__empty(evlist)); + + nr_events = evlist->core.nr_entries; + ev_name = calloc(nr_events, sizeof(*ev_name)); + if (ev_name == NULL) { + pr_debug("memory allocation failure\n"); + return TEST_FAIL; + } + i = 0; + evlist__for_each_entry(evlist, evsel) { + ev_name[i] = strdup(evsel->name); + if (ev_name[i] == NULL) { + pr_debug("memory allocation failure\n"); + goto out; + } + i++; + } + /* remember grouping info */ + was_group_event = evsel__is_group_event(evlist__first(evlist)); + nr_members = evlist__first(evlist)->core.nr_members; + + ret = evlist__expand_cgroup(evlist, cgrp_str, metric_events, false); + if (ret < 0) { + pr_debug("failed to expand events for cgroups\n"); + goto out; + } + + ret = TEST_FAIL; + if (evlist->core.nr_entries != nr_events * nr_cgrps) { + pr_debug("event count doesn't match\n"); + goto out; + } + + i = 0; + evlist__for_each_entry(evlist, evsel) { + if (strcmp(evsel->name, ev_name[i % nr_events])) { + pr_debug("event name doesn't match:\n"); + pr_debug(" evsel[%d]: %s\n expected: %s\n", + i, evsel->name, ev_name[i % nr_events]); + goto out; + } + if (strcmp(evsel->cgrp->name, cgrp_name[i / nr_events])) { + pr_debug("cgroup name doesn't match:\n"); + pr_debug(" evsel[%d]: %s\n expected: %s\n", + i, evsel->cgrp->name, cgrp_name[i / nr_events]); + goto out; + } + + if ((i % nr_events) == 0) { + if (evsel__is_group_event(evsel) != was_group_event) { + pr_debug("event group doesn't match: got %s, expect %s\n", + evsel__is_group_event(evsel) ? "true" : "false", + was_group_event ? "true" : "false"); + goto out; + } + if (evsel->core.nr_members != nr_members) { + pr_debug("event group member doesn't match: %d vs %d\n", + evsel->core.nr_members, nr_members); + goto out; + } + } + i++; + } + ret = TEST_OK; + +out: for (i = 0; i < nr_events; i++) + free(ev_name[i]); + free(ev_name); + return ret; +} + +static int expand_default_events(void) +{ + int ret; + struct evlist *evlist; + struct rblist metric_events; + + evlist = perf_evlist__new_default(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + rblist__init(&metric_events); + ret = test_expand_events(evlist, &metric_events); + evlist__delete(evlist); + return ret; +} + +static int expand_group_events(void) +{ + int ret; + struct evlist *evlist; + struct rblist metric_events; + struct parse_events_error err; + const char event_str[] = "{cycles,instructions}"; + + symbol_conf.event_group = true; + + evlist = evlist__new(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + ret = parse_events(evlist, event_str, &err); + if (ret < 0) { + pr_debug("failed to parse event '%s', err %d, str '%s'\n", + event_str, ret, err.str); + parse_events_print_error(&err, event_str); + goto out; + } + + rblist__init(&metric_events); + ret = test_expand_events(evlist, &metric_events); +out: + evlist__delete(evlist); + return ret; +} + +static int expand_libpfm_events(void) +{ + int ret; + struct evlist *evlist; + struct rblist metric_events; + const char event_str[] = "UNHALTED_CORE_CYCLES"; + struct option opt = { + .value = &evlist, + }; + + symbol_conf.event_group = true; + + evlist = evlist__new(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + ret = parse_libpfm_events_option(&opt, event_str, 0); + if (ret < 0) { + pr_debug("failed to parse libpfm event '%s', err %d\n", + event_str, ret); + goto out; + } + if (perf_evlist__empty(evlist)) { + pr_debug("libpfm was not enabled\n"); + goto out; + } + + rblist__init(&metric_events); + ret = test_expand_events(evlist, &metric_events); +out: + evlist__delete(evlist); + return ret; +} + +static int expand_metric_events(void) +{ + int ret; + struct evlist *evlist; + struct rblist metric_events; + const char metric_str[] = "CPI"; + + struct pmu_event pme_test[] = { + { + .metric_expr = "instructions / cycles", + .metric_name = "IPC", + }, + { + .metric_expr = "1 / IPC", + .metric_name = "CPI", + }, + { + .metric_expr = NULL, + .metric_name = NULL, + }, + }; + struct pmu_events_map ev_map = { + .cpuid = "test", + .version = "1", + .type = "core", + .table = pme_test, + }; + + evlist = evlist__new(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + rblist__init(&metric_events); + ret = metricgroup__parse_groups_test(evlist, &ev_map, metric_str, + false, false, &metric_events); + if (ret < 0) { + pr_debug("failed to parse '%s' metric\n", metric_str); + goto out; + } + + ret = test_expand_events(evlist, &metric_events); + +out: + metricgroup__rblist_exit(&metric_events); + evlist__delete(evlist); + return ret; +} + +int test__expand_cgroup_events(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + int ret; + + ret = expand_default_events(); + TEST_ASSERT_EQUAL("failed to expand default events", ret, 0); + + ret = expand_group_events(); + TEST_ASSERT_EQUAL("failed to expand event group", ret, 0); + + ret = expand_libpfm_events(); + TEST_ASSERT_EQUAL("failed to expand event group", ret, 0); + + ret = expand_metric_events(); + TEST_ASSERT_EQUAL("failed to expand metric events", ret, 0); + + return ret; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index ef0f33c6ba23..c85a2c08e407 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -123,6 +123,7 @@ const char *test__pfm_subtest_get_desc(int subtest); int test__pfm_subtest_get_nr(void); int test__parse_metric(struct test *test, int subtest); int test__pe_file_parsing(struct test *test, int subtest); +int test__expand_cgroup_events(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); -- cgit v1.3.1 From aa98d8482c8397d4087248e1f50d50078b34326d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Sep 2020 17:39:03 -0700 Subject: perf parse-events: Reduce casts around bp_addr perf_event_attr bp_addr is a u64. parse-events.y parses it as a u64, but casts it to a void* and then parse-events.c casts it back to a u64. Rather than all the casts, change the type of the address to be a u64. This removes an issue noted in: https://lore.kernel.org/lkml/20200903184359.GC3495158@kernel.org/ Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jin Yao Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200925003903.561568-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 4 ++-- tools/perf/util/parse-events.h | 2 +- tools/perf/util/parse-events.y | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 667cbca1547a..f82ef1e840b2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -940,12 +940,12 @@ do { \ } int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type, u64 len) + u64 addr, char *type, u64 len) { struct perf_event_attr attr; memset(&attr, 0, sizeof(attr)); - attr.bp_addr = (unsigned long) ptr; + attr.bp_addr = addr; if (parse_breakpoint_type(type, &attr)) return -EINVAL; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 00cde7d2e30c..e80c9b74f2f2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -190,7 +190,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, struct parse_events_error *error, struct list_head *head_config); int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type, u64 len); + u64 addr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 645bf4f1859f..d5b6aff82f21 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -511,7 +511,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc list = alloc_list(); ABORT_ON(!list); err = parse_events_add_breakpoint(list, &parse_state->idx, - (void *)(uintptr_t) $2, $6, $4); + $2, $6, $4); free($6); if (err) { free(list); @@ -528,7 +528,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc list = alloc_list(); ABORT_ON(!list); if (parse_events_add_breakpoint(list, &parse_state->idx, - (void *)(uintptr_t) $2, NULL, $4)) { + $2, NULL, $4)) { free(list); YYABORT; } @@ -544,7 +544,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc list = alloc_list(); ABORT_ON(!list); err = parse_events_add_breakpoint(list, &parse_state->idx, - (void *)(uintptr_t) $2, $4, 0); + $2, $4, 0); free($4); if (err) { free(list); @@ -561,7 +561,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc list = alloc_list(); ABORT_ON(!list); if (parse_events_add_breakpoint(list, &parse_state->idx, - (void *)(uintptr_t) $2, NULL, 0)) { + $2, NULL, 0)) { free(list); YYABORT; } -- cgit v1.3.1 From a55b7bb1c14662490689e094964ecc5387bb2b55 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 23 Sep 2020 14:06:55 -0700 Subject: perf test: Fix msan uninitialized use. Ensure 'st' is initialized before an error branch is taken. Fixes test "67: Parse and process metrics" with LLVM msan: ==6757==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x5570edae947d in rblist__exit tools/perf/util/rblist.c:114:2 #1 0x5570edb1c6e8 in runtime_stat__exit tools/perf/util/stat-shadow.c:141:2 #2 0x5570ed92cfae in __compute_metric tools/perf/tests/parse-metric.c:187:2 #3 0x5570ed92cb74 in compute_metric tools/perf/tests/parse-metric.c:196:9 #4 0x5570ed92c6d8 in test_recursion_fail tools/perf/tests/parse-metric.c:318:2 #5 0x5570ed92b8c8 in test__parse_metric tools/perf/tests/parse-metric.c:356:2 #6 0x5570ed8de8c1 in run_test tools/perf/tests/builtin-test.c:410:9 #7 0x5570ed8ddadf in test_and_print tools/perf/tests/builtin-test.c:440:9 #8 0x5570ed8dca04 in __cmd_test tools/perf/tests/builtin-test.c:661:4 #9 0x5570ed8dbc07 in cmd_test tools/perf/tests/builtin-test.c:807:9 #10 0x5570ed7326cc in run_builtin tools/perf/perf.c:313:11 #11 0x5570ed731639 in handle_internal_command tools/perf/perf.c:365:8 #12 0x5570ed7323cd in run_argv tools/perf/perf.c:409:2 #13 0x5570ed731076 in main tools/perf/perf.c:539:3 Fixes: commit f5a56570a3f2 ("perf test: Fix memory leaks in parse-metric test") Signed-off-by: Ian Rogers Reviewed-by: Nick Desaulniers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200923210655.4143682-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index aea4f970fccc..7c1bde01cb50 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -157,6 +157,7 @@ static int __compute_metric(const char *name, struct value *vals, } perf_evlist__set_maps(&evlist->core, cpus, NULL); + runtime_stat__init(&st); /* Parse the metric into metric_events list. */ err = metricgroup__parse_groups_test(evlist, &map, name, @@ -170,7 +171,6 @@ static int __compute_metric(const char *name, struct value *vals, goto out; /* Load the runtime stats with given numbers for events. */ - runtime_stat__init(&st); load_runtime_stat(&st, evlist, vals); /* And execute the metric */ -- cgit v1.3.1 From 53db3e53e22d77eadd079076086c32b2764578da Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 25 Sep 2020 17:56:43 -0700 Subject: selftests: net: add a test for shared UDP tunnel info tables Add a test run of checks validating the shared UDP tunnel port tables function as we expect. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../drivers/net/netdevsim/udp_tunnel_nic.sh | 109 +++++++++++++++++++++ 1 file changed, 109 insertions(+) mode change 100644 => 100755 tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh old mode 100644 new mode 100755 index ba1d53b9f815..9225133784af --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -775,6 +775,115 @@ for port in 0 1; do exp1=( 0 0 0 0 ) done +cleanup_nsim + +# shared port tables +pfx="table sharing" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 0 > $NSIM_DEV_DFS/udp_ports_open_only +echo 1 > $NSIM_DEV_DFS/udp_ports_sleep +echo 1 > $NSIM_DEV_DFS/udp_ports_shared + +old_netdevs=$(ls /sys/class/net) +echo 1 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` +old_netdevs=$(ls /sys/class/net) +echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV2=`get_netdev_name old_netdevs` + +msg="VxLAN v4 devices" +exp0=( `mke 4789 1` 0 0 0 ) +exp1=( 0 0 0 0 ) +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV2 + +msg="VxLAN v4 devices go down" +exp0=( 0 0 0 0 ) +ifconfig vxlan1 down +ifconfig vxlan0 down +check_tables + +for ifc in vxlan0 vxlan1; do + ifconfig $ifc up +done + +msg="VxLAN v6 device" +exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) +new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + +msg="Geneve device" +exp1=( `mke 6081 2` 0 0 0 ) +new_geneve gnv0 6081 + +msg="NIC device goes down" +ifconfig $NSIM_NETDEV down +check_tables + +msg="NIC device goes up again" +ifconfig $NSIM_NETDEV up +check_tables + +for i in `seq 2`; do + msg="turn feature off - 1, rep $i" + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature off - 2, rep $i" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature on - 1, rep $i" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="turn feature on - 2, rep $i" + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on + check_tables +done + +msg="tunnels destroyed 1" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +overflow_table0 "overflow NIC table" + +msg="re-add a port" + +echo 2 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/new_port +check_tables + +msg="replace VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) +del_dev vxlan1 + +msg="vacate VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) +del_dev vxlan2 + +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +check_tables + +msg="tunnels destroyed 2" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +echo 1 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/del_port + +cleanup_nsim + modprobe -r netdevsim if [ $num_errors -eq 0 ]; then -- cgit v1.3.1 From 8c4cf4bc3ea7d6a2724f9399ad3d8f189fc92456 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 25 Sep 2020 17:56:48 -0700 Subject: selftests: net: add a test for static UDP tunnel ports Check UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN works as expected. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../drivers/net/netdevsim/udp_tunnel_nic.sh | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 9225133784af..1b08e042cf94 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -7,6 +7,7 @@ NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID NSIM_NETDEV= HAS_ETHTOOL= +STATIC_ENTRIES= EXIT_STATUS=0 num_cases=0 num_errors=0 @@ -193,6 +194,21 @@ function check_tables { sleep 0.02 ((retries--)) done + + if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then + fail=0 + for i in "${!STATIC_ENTRIES[@]}"; do + pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}` + cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected") + if [ $cnt -ne 1 ]; then + err_cnt "ethtool static entry: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + fail=1 + fi + done + [ $fail == 0 ] && pass_cnt + fi } function print_table { @@ -884,6 +900,48 @@ echo 2 > $NSIM_DEV_SYS/del_port cleanup_nsim +# Static IANA port +pfx="static IANA vxlan" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan +STATIC_ENTRIES=( `mke 4789 1` ) + +port=1 +old_netdevs=$(ls /sys/class/net) +echo $port > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="check empty" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="add on static port" +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV + +msg="add on different port" +exp0=( `mke 4790 1` 0 0 0 ) +new_vxlan vxlan2 4790 $NSIM_NETDEV + +cleanup_tuns + +msg="tunnels destroyed" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="different type" +new_geneve gnv0 4789 + +cleanup_tuns +cleanup_nsim + +# END + modprobe -r netdevsim if [ $num_errors -eq 0 ]; then -- cgit v1.3.1 From 1b4d60ec162f82ea29a2e7a907b5c6cc9f926321 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 25 Sep 2020 13:54:29 -0700 Subject: bpf: Enable BPF_PROG_TEST_RUN for raw_tracepoint Add .test_run for raw_tracepoint. Also, introduce a new feature that runs the target program on a specific CPU. This is achieved by a new flag in bpf_attr.test, BPF_F_TEST_RUN_ON_CPU. When this flag is set, the program is triggered on cpu with id bpf_attr.test.cpu. This feature is needed for BPF programs that handle perf_event and other percpu resources, as the program can access these resource locally. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200925205432.1777-2-songliubraving@fb.com --- include/linux/bpf.h | 3 ++ include/uapi/linux/bpf.h | 7 ++++ kernel/bpf/syscall.c | 2 +- kernel/trace/bpf_trace.c | 1 + net/bpf/test_run.c | 91 ++++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 ++++ 6 files changed, 110 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 79902325bef8..db6dcdee7933 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1396,6 +1396,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2d6519a2ed77..82522f05c021 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -424,6 +424,11 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* Flags for BPF_PROG_TEST_RUN */ + +/* If set, run the test on the cpu specified by bpf_attr.test.cpu */ +#define BPF_F_TEST_RUN_ON_CPU (1U << 0) + /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { /* enabled run_time_ns and run_cnt */ @@ -566,6 +571,8 @@ union bpf_attr { */ __aligned_u64 ctx_in; __aligned_u64 ctx_out; + __u32 flags; + __u32 cpu; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2740df19f55e..3bc2ed2e171b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2979,7 +2979,7 @@ static int bpf_prog_query(const union bpf_attr *attr, } } -#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out +#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu static int bpf_prog_test_run(const union bpf_attr *attr, union bpf_attr __user *uattr) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 36508f46a8db..2834866d379a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1678,6 +1678,7 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { }; const struct bpf_prog_ops raw_tracepoint_prog_ops = { + .test_run = bpf_prog_test_run_raw_tp, }; const struct bpf_verifier_ops tracing_verifier_ops = { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index a66f211726e7..fde5db93507c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -11,6 +11,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -204,6 +205,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, int b = 2, err = -EFAULT; u32 retval = 0; + if (kattr->test.flags || kattr->test.cpu) + return -EINVAL; + switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: @@ -236,6 +240,87 @@ out: return err; } +struct bpf_raw_tp_test_run_info { + struct bpf_prog *prog; + void *ctx; + u32 retval; +}; + +static void +__bpf_prog_test_run_raw_tp(void *data) +{ + struct bpf_raw_tp_test_run_info *info = data; + + rcu_read_lock(); + migrate_disable(); + info->retval = BPF_PROG_RUN(info->prog, info->ctx); + migrate_enable(); + rcu_read_unlock(); +} + +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in); + __u32 ctx_size_in = kattr->test.ctx_size_in; + struct bpf_raw_tp_test_run_info info; + int cpu = kattr->test.cpu, err = 0; + + /* doesn't support data_in/out, ctx_out, duration, or repeat */ + if (kattr->test.data_in || kattr->test.data_out || + kattr->test.ctx_out || kattr->test.duration || + kattr->test.repeat) + return -EINVAL; + + if (ctx_size_in < prog->aux->max_ctx_offset) + return -EINVAL; + + if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0) + return -EINVAL; + + if (ctx_size_in) { + info.ctx = kzalloc(ctx_size_in, GFP_USER); + if (!info.ctx) + return -ENOMEM; + if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) { + err = -EFAULT; + goto out; + } + } else { + info.ctx = NULL; + } + + info.prog = prog; + + if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 || + cpu == smp_processor_id()) { + __bpf_prog_test_run_raw_tp(&info); + } else { + /* smp_call_function_single() also checks cpu_online() + * after csd_lock(). However, since cpu is from user + * space, let's do an extra quick check to filter out + * invalid value before smp_call_function_single(). + */ + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { + err = -ENXIO; + goto out; + } + + err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp, + &info, 1); + if (err) + goto out; + } + + if (copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32))) + err = -EFAULT; + +out: + kfree(info.ctx); + return err; +} + static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) { void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); @@ -410,6 +495,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; + if (kattr->test.flags || kattr->test.cpu) + return -EINVAL; + data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); if (IS_ERR(data)) @@ -607,6 +695,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) return -EINVAL; + if (kattr->test.flags || kattr->test.cpu) + return -EINVAL; + if (size < ETH_HLEN) return -EINVAL; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2d6519a2ed77..82522f05c021 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -424,6 +424,11 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* Flags for BPF_PROG_TEST_RUN */ + +/* If set, run the test on the cpu specified by bpf_attr.test.cpu */ +#define BPF_F_TEST_RUN_ON_CPU (1U << 0) + /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { /* enabled run_time_ns and run_cnt */ @@ -566,6 +571,8 @@ union bpf_attr { */ __aligned_u64 ctx_in; __aligned_u64 ctx_out; + __u32 flags; + __u32 cpu; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ -- cgit v1.3.1 From 88f7fe7233244101fa5b7786e2e298bf27fe1375 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 25 Sep 2020 13:54:30 -0700 Subject: libbpf: Support test run of raw tracepoint programs Add bpf_prog_test_run_opts() with support of new fields in bpf_attr.test, namely, flags and cpu. Also extend _opts operations to support outputs via opts. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200925205432.1777-3-songliubraving@fb.com --- tools/lib/bpf/bpf.c | 31 +++++++++++++++++++++++++++++++ tools/lib/bpf/bpf.h | 26 ++++++++++++++++++++++++++ tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/libbpf_internal.h | 5 +++++ 4 files changed, 63 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 2baa1308737c..c5a4d8444bf6 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -712,6 +712,37 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) return ret; } +int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) +{ + union bpf_attr attr; + int ret; + + if (!OPTS_VALID(opts, bpf_test_run_opts)) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.test.prog_fd = prog_fd; + attr.test.cpu = OPTS_GET(opts, cpu, 0); + attr.test.flags = OPTS_GET(opts, flags, 0); + attr.test.repeat = OPTS_GET(opts, repeat, 0); + attr.test.duration = OPTS_GET(opts, duration, 0); + attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0); + attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0); + attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0); + attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0); + attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL)); + attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL)); + attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL)); + attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL)); + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + OPTS_SET(opts, data_size_out, attr.test.data_size_out); + OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out); + OPTS_SET(opts, duration, attr.test.duration); + OPTS_SET(opts, retval, attr.test.retval); + return ret; +} + static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 8c1ac4b42f90..4f3568e55527 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -251,6 +251,32 @@ struct bpf_prog_bind_opts { LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd, const struct bpf_prog_bind_opts *opts); + +struct bpf_test_run_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + const void *data_in; /* optional */ + void *data_out; /* optional */ + __u32 data_size_in; + __u32 data_size_out; /* in: max length of data_out + * out: length of data_out + */ + const void *ctx_in; /* optional */ + void *ctx_out; /* optional */ + __u32 ctx_size_in; + __u32 ctx_size_out; /* in: max length of ctx_out + * out: length of cxt_out + */ + __u32 retval; /* out: return code of the BPF program */ + int repeat; + __u32 duration; /* out: average per repetition in ns */ + __u32 flags; + __u32 cpu; +}; +#define bpf_test_run_opts__last_field cpu + +LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, + struct bpf_test_run_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 5f054dadf082..0623e7a99b1e 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -303,6 +303,7 @@ LIBBPF_0.1.0 { LIBBPF_0.2.0 { global: bpf_prog_bind_map; + bpf_prog_test_run_opts; bpf_program__section_name; perf_buffer__buffer_cnt; perf_buffer__buffer_fd; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4d1c366fca2c..d3f7a6a6bc98 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -136,6 +136,11 @@ static inline bool libbpf_validate_opts(const char *opts, ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field)) #define OPTS_GET(opts, field, fallback_value) \ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) +#define OPTS_SET(opts, field, value) \ + do { \ + if (OPTS_HAS(opts, field)) \ + (opts)->field = value; \ + } while (0) int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); -- cgit v1.3.1 From 09d8ad16885ee2ea3b9a4d4735ae053f425a1faf Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 25 Sep 2020 13:54:31 -0700 Subject: selftests/bpf: Add raw_tp_test_run This test runs test_run for raw_tracepoint program. The test covers ctx input, retval output, and running on correct cpu. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200925205432.1777-4-songliubraving@fb.com --- .../selftests/bpf/prog_tests/raw_tp_test_run.c | 96 ++++++++++++++++++++++ .../selftests/bpf/progs/test_raw_tp_test_run.c | 24 ++++++ 2 files changed, 120 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c create mode 100644 tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c new file mode 100644 index 000000000000..c5fb191874ac --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019 Facebook */ +#include +#include +#include "bpf/libbpf_internal.h" +#include "test_raw_tp_test_run.skel.h" + +static int duration; + +void test_raw_tp_test_run(void) +{ + struct bpf_prog_test_run_attr test_attr = {}; + int comm_fd = -1, err, nr_online, i, prog_fd; + __u64 args[2] = {0x1234ULL, 0x5678ULL}; + int expected_retval = 0x1234 + 0x5678; + struct test_raw_tp_test_run *skel; + char buf[] = "new_name"; + bool *online = NULL; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = args, + .ctx_size_in = sizeof(args), + .flags = BPF_F_TEST_RUN_ON_CPU, + ); + + err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online, + &nr_online); + if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err)) + return; + + skel = test_raw_tp_test_run__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + goto cleanup; + + err = test_raw_tp_test_run__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno)) + goto cleanup; + + err = write(comm_fd, buf, sizeof(buf)); + CHECK(err < 0, "task rename", "err %d", errno); + + CHECK(skel->bss->count == 0, "check_count", "didn't increase\n"); + CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n"); + + prog_fd = bpf_program__fd(skel->progs.rename); + test_attr.prog_fd = prog_fd; + test_attr.ctx_in = args; + test_attr.ctx_size_in = sizeof(__u64); + + err = bpf_prog_test_run_xattr(&test_attr); + CHECK(err == 0, "test_run", "should fail for too small ctx\n"); + + test_attr.ctx_size_in = sizeof(args); + err = bpf_prog_test_run_xattr(&test_attr); + CHECK(err < 0, "test_run", "err %d\n", errno); + CHECK(test_attr.retval != expected_retval, "check_retval", + "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval); + + for (i = 0; i < nr_online; i++) { + if (!online[i]) + continue; + + opts.cpu = i; + opts.retval = 0; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err < 0, "test_run_opts", "err %d\n", errno); + CHECK(skel->data->on_cpu != i, "check_on_cpu", + "expect %d got %d\n", i, skel->data->on_cpu); + CHECK(opts.retval != expected_retval, + "check_retval", "expect 0x%x, got 0x%x\n", + expected_retval, opts.retval); + } + + /* invalid cpu ID should fail with ENXIO */ + opts.cpu = 0xffffffff; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err != -1 || errno != ENXIO, + "test_run_opts_fail", + "should failed with ENXIO\n"); + + /* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */ + opts.cpu = 1; + opts.flags = 0; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err != -1 || errno != EINVAL, + "test_run_opts_fail", + "should failed with EINVAL\n"); + +cleanup: + close(comm_fd); + test_raw_tp_test_run__destroy(skel); + free(online); +} diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c new file mode 100644 index 000000000000..1521853597d7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include +#include + +__u32 count = 0; +__u32 on_cpu = 0xffffffff; + +SEC("raw_tp/task_rename") +int BPF_PROG(rename, struct task_struct *task, char *comm) +{ + + count++; + if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) { + on_cpu = bpf_get_smp_processor_id(); + return (int)task + (int)comm; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 26c3270ddb4955be358c888766ad1a6105dd7469 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 28 Sep 2020 10:08:03 +0100 Subject: selftests: bpf: Add helper to compare socket cookies We compare socket cookies to ensure that insertion into a sockmap worked. Pull this out into a helper function for use in other tests. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200928090805.23343-3-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 50 ++++++++++++++++------ 1 file changed, 36 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 4b7a527e7e82..3596d3f3039f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -50,6 +50,37 @@ error: return -1; } +static void compare_cookies(struct bpf_map *src, struct bpf_map *dst) +{ + __u32 i, max_entries = bpf_map__max_entries(src); + int err, duration = 0, src_fd, dst_fd; + + src_fd = bpf_map__fd(src); + dst_fd = bpf_map__fd(dst); + + for (i = 0; i < max_entries; i++) { + __u64 src_cookie, dst_cookie; + + err = bpf_map_lookup_elem(src_fd, &i, &src_cookie); + if (err && errno == ENOENT) { + err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie); + CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i); + CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n", + strerror(errno)); + continue; + } + if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno))) + continue; + + err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie); + if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno))) + continue; + + CHECK(dst_cookie != src_cookie, "cookie mismatch", + "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i); + } +} + /* Create a map, populate it with one socket, and free the map. */ static void test_sockmap_create_update_free(enum bpf_map_type map_type) { @@ -109,9 +140,9 @@ out: static void test_sockmap_update(enum bpf_map_type map_type) { struct bpf_prog_test_run_attr tattr; - int err, prog, src, dst, duration = 0; + int err, prog, src, duration = 0; struct test_sockmap_update *skel; - __u64 src_cookie, dst_cookie; + struct bpf_map *dst_map; const __u32 zero = 0; char dummy[14] = {0}; __s64 sk; @@ -127,18 +158,14 @@ static void test_sockmap_update(enum bpf_map_type map_type) prog = bpf_program__fd(skel->progs.copy_sock_map); src = bpf_map__fd(skel->maps.src); if (map_type == BPF_MAP_TYPE_SOCKMAP) - dst = bpf_map__fd(skel->maps.dst_sock_map); + dst_map = skel->maps.dst_sock_map; else - dst = bpf_map__fd(skel->maps.dst_sock_hash); + dst_map = skel->maps.dst_sock_hash; err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST); if (CHECK(err, "update_elem(src)", "errno=%u\n", errno)) goto out; - err = bpf_map_lookup_elem(src, &zero, &src_cookie); - if (CHECK(err, "lookup_elem(src, cookie)", "errno=%u\n", errno)) - goto out; - tattr = (struct bpf_prog_test_run_attr){ .prog_fd = prog, .repeat = 1, @@ -151,12 +178,7 @@ static void test_sockmap_update(enum bpf_map_type map_type) "errno=%u retval=%u\n", errno, tattr.retval)) goto out; - err = bpf_map_lookup_elem(dst, &zero, &dst_cookie); - if (CHECK(err, "lookup_elem(dst, cookie)", "errno=%u\n", errno)) - goto out; - - CHECK(dst_cookie != src_cookie, "cookie mismatch", "%llu != %llu\n", - dst_cookie, src_cookie); + compare_cookies(skel->maps.src, dst_map); out: test_sockmap_update__destroy(skel); -- cgit v1.3.1 From 27870317337a6c24af503304620de8064c8d2e4a Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 28 Sep 2020 10:08:04 +0100 Subject: selftests: bpf: Remove shared header from sockmap iter test The shared header to define SOCKMAP_MAX_ENTRIES is a bit overkill. Dynamically allocate the sock_fd array based on bpf_map__max_entries instead. Suggested-by: Yonghong Song Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200928090805.23343-4-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 36 +++++++++++----------- .../testing/selftests/bpf/progs/bpf_iter_sockmap.c | 5 ++- .../testing/selftests/bpf/progs/bpf_iter_sockmap.h | 3 -- 3 files changed, 20 insertions(+), 24 deletions(-) delete mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 3596d3f3039f..316c4e271b36 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -8,8 +8,6 @@ #include "test_sockmap_invalid_update.skel.h" #include "bpf_iter_sockmap.skel.h" -#include "progs/bpf_iter_sockmap.h" - #define TCP_REPAIR 19 /* TCP sock is under repair right now */ #define TCP_REPAIR_ON 1 @@ -201,9 +199,9 @@ static void test_sockmap_iter(enum bpf_map_type map_type) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, len, src_fd, iter_fd, duration = 0; union bpf_iter_link_info linfo = {0}; - __s64 sock_fd[SOCKMAP_MAX_ENTRIES]; - __u32 i, num_sockets, max_elems; + __u32 i, num_sockets, num_elems; struct bpf_iter_sockmap *skel; + __s64 *sock_fd = NULL; struct bpf_link *link; struct bpf_map *src; char buf[64]; @@ -212,22 +210,23 @@ static void test_sockmap_iter(enum bpf_map_type map_type) if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n")) return; - for (i = 0; i < ARRAY_SIZE(sock_fd); i++) - sock_fd[i] = -1; - - /* Make sure we have at least one "empty" entry to test iteration of - * an empty slot. - */ - num_sockets = ARRAY_SIZE(sock_fd) - 1; - if (map_type == BPF_MAP_TYPE_SOCKMAP) { src = skel->maps.sockmap; - max_elems = bpf_map__max_entries(src); + num_elems = bpf_map__max_entries(src); + num_sockets = num_elems - 1; } else { src = skel->maps.sockhash; - max_elems = num_sockets; + num_elems = bpf_map__max_entries(src) - 1; + num_sockets = num_elems; } + sock_fd = calloc(num_sockets, sizeof(*sock_fd)); + if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n")) + goto out; + + for (i = 0; i < num_sockets; i++) + sock_fd[i] = -1; + src_fd = bpf_map__fd(src); for (i = 0; i < num_sockets; i++) { @@ -258,8 +257,8 @@ static void test_sockmap_iter(enum bpf_map_type map_type) goto close_iter; /* test results */ - if (CHECK(skel->bss->elems != max_elems, "elems", "got %u expected %u\n", - skel->bss->elems, max_elems)) + if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n", + skel->bss->elems, num_elems)) goto close_iter; if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n", @@ -271,10 +270,11 @@ close_iter: free_link: bpf_link__destroy(link); out: - for (i = 0; i < num_sockets; i++) { + for (i = 0; sock_fd && i < num_sockets; i++) if (sock_fd[i] >= 0) close(sock_fd[i]); - } + if (sock_fd) + free(sock_fd); bpf_iter_sockmap__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c index 0e27f73dd803..1af7555f6057 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Cloudflare */ #include "bpf_iter.h" #include "bpf_tracing_net.h" -#include "bpf_iter_sockmap.h" #include #include #include @@ -11,14 +10,14 @@ char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); - __uint(max_entries, SOCKMAP_MAX_ENTRIES); + __uint(max_entries, 64); __type(key, __u32); __type(value, __u64); } sockmap SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_SOCKHASH); - __uint(max_entries, SOCKMAP_MAX_ENTRIES); + __uint(max_entries, 64); __type(key, __u32); __type(value, __u64); } sockhash SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h deleted file mode 100644 index 35a675d13c0f..000000000000 --- a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h +++ /dev/null @@ -1,3 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#define SOCKMAP_MAX_ENTRIES (64) -- cgit v1.3.1 From 5b87adc3ceee3c3131e24e9bc9ef92dd41db089f Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 28 Sep 2020 10:08:05 +0100 Subject: selftest: bpf: Test copying a sockmap and sockhash Since we can now call map_update_elem(sockmap) from bpf_iter context it's possible to copy a sockmap or sockhash in the kernel. Add a selftest which exercises this. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200928090805.23343-5-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 14 ++++++----- .../testing/selftests/bpf/progs/bpf_iter_sockmap.c | 27 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 316c4e271b36..4c4224e3e10a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -194,7 +194,7 @@ static void test_sockmap_invalid_update(void) test_sockmap_invalid_update__destroy(skel); } -static void test_sockmap_iter(enum bpf_map_type map_type) +static void test_sockmap_copy(enum bpf_map_type map_type) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, len, src_fd, iter_fd, duration = 0; @@ -242,7 +242,7 @@ static void test_sockmap_iter(enum bpf_map_type map_type) linfo.map.map_fd = src_fd; opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - link = bpf_program__attach_iter(skel->progs.count_elems, &opts); + link = bpf_program__attach_iter(skel->progs.copy, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -265,6 +265,8 @@ static void test_sockmap_iter(enum bpf_map_type map_type) skel->bss->socks, num_sockets)) goto close_iter; + compare_cookies(src, skel->maps.dst); + close_iter: close(iter_fd); free_link: @@ -294,8 +296,8 @@ void test_sockmap_basic(void) test_sockmap_update(BPF_MAP_TYPE_SOCKHASH); if (test__start_subtest("sockmap update in unsafe context")) test_sockmap_invalid_update(); - if (test__start_subtest("sockmap iter")) - test_sockmap_iter(BPF_MAP_TYPE_SOCKMAP); - if (test__start_subtest("sockhash iter")) - test_sockmap_iter(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap copy")) + test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash copy")) + test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c index 1af7555f6057..f3af0e30cead 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c @@ -22,21 +22,38 @@ struct { __type(value, __u64); } sockhash SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} dst SEC(".maps"); + __u32 elems = 0; __u32 socks = 0; SEC("iter/sockmap") -int count_elems(struct bpf_iter__sockmap *ctx) +int copy(struct bpf_iter__sockmap *ctx) { struct sock *sk = ctx->sk; __u32 tmp, *key = ctx->key; int ret; - if (key) - elems++; + if (!key) + return 0; + + elems++; + + /* We need a temporary buffer on the stack, since the verifier doesn't + * let us use the pointer from the context as an argument to the helper. + */ + tmp = *key; - if (sk) + if (sk) { socks++; + return bpf_map_update_elem(&dst, &tmp, sk, 0) != 0; + } - return 0; + ret = bpf_map_delete_elem(&dst, &tmp); + return ret && ret != -ENOENT; } -- cgit v1.3.1 From b000def2e052fc8ddea31a18019f6ebe044defb3 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Fri, 25 Sep 2020 23:25:11 +0200 Subject: selftests: Remove fmod_ret from test_overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test_overhead prog_test included an fmod_ret program that attached to __set_task_comm() in the kernel. However, this function was never listed as allowed for return modification, so this only worked because of the verifier skipping tests when a trampoline already existed for the attach point. Now that the verifier checks have been fixed, remove fmod_ret from the test so it works again. Fixes: 4eaf0b5c5e04 ("selftest/bpf: Fmod_ret prog and implement test_overhead as part of bench") Acked-by: Andrii Nakryiko Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bench.c | 3 --- tools/testing/selftests/bpf/benchs/bench_rename.c | 17 ----------------- tools/testing/selftests/bpf/prog_tests/test_overhead.c | 14 +------------- tools/testing/selftests/bpf/progs/test_overhead.c | 6 ------ 4 files changed, 1 insertion(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 1a427685a8a8..332ed2f7b402 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -311,7 +311,6 @@ extern const struct bench bench_rename_kretprobe; extern const struct bench bench_rename_rawtp; extern const struct bench bench_rename_fentry; extern const struct bench bench_rename_fexit; -extern const struct bench bench_rename_fmodret; extern const struct bench bench_trig_base; extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; @@ -333,7 +332,6 @@ static const struct bench *benchs[] = { &bench_rename_rawtp, &bench_rename_fentry, &bench_rename_fexit, - &bench_rename_fmodret, &bench_trig_base, &bench_trig_tp, &bench_trig_rawtp, @@ -464,4 +462,3 @@ int main(int argc, char **argv) return 0; } - diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index e74cff40f4fe..a967674098ad 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -106,12 +106,6 @@ static void setup_fexit() attach_bpf(ctx.skel->progs.prog5); } -static void setup_fmodret() -{ - setup_ctx(); - attach_bpf(ctx.skel->progs.prog6); -} - static void *consumer(void *input) { return NULL; @@ -182,14 +176,3 @@ const struct bench bench_rename_fexit = { .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; - -const struct bench bench_rename_fmodret = { - .name = "rename-fmodret", - .validate = validate, - .setup = setup_fmodret, - .producer_thread = producer, - .consumer_thread = consumer, - .measure = measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index 2702df2b2343..9966685866fd 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -61,10 +61,9 @@ void test_test_overhead(void) const char *raw_tp_name = "raw_tp/task_rename"; const char *fentry_name = "fentry/__set_task_comm"; const char *fexit_name = "fexit/__set_task_comm"; - const char *fmodret_name = "fmod_ret/__set_task_comm"; const char *kprobe_func = "__set_task_comm"; struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog; - struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog; + struct bpf_program *fentry_prog, *fexit_prog; struct bpf_object *obj; struct bpf_link *link; int err, duration = 0; @@ -97,11 +96,6 @@ void test_test_overhead(void) if (CHECK(!fexit_prog, "find_probe", "prog '%s' not found\n", fexit_name)) goto cleanup; - fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name); - if (CHECK(!fmodret_prog, "find_probe", - "prog '%s' not found\n", fmodret_name)) - goto cleanup; - err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) goto cleanup; @@ -148,12 +142,6 @@ void test_test_overhead(void) test_run("fexit"); bpf_link__destroy(link); - /* attach fmod_ret */ - link = bpf_program__attach_trace(fmodret_prog); - if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link))) - goto cleanup; - test_run("fmod_ret"); - bpf_link__destroy(link); cleanup: prctl(PR_SET_NAME, comm, 0L, 0L, 0L); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index 42403d088abc..abb7344b531f 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -39,10 +39,4 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec) return 0; } -SEC("fmod_ret/__set_task_comm") -int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec) -{ - return !tsk; -} - char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 740e69c3c511aa207155ba993a854b5bee79cdc2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 25 Sep 2020 18:13:49 -0700 Subject: libbpf: Refactor internals of BTF type index Refactor implementation of internal BTF type index to not use direct pointers. Instead it uses offset relative to the start of types data section. This allows for types data to be reallocatable, enabling implementation of modifiable BTF. As now getting type by ID has an extra indirection step, convert all internal type lookups to a new helper btf_type_id(), that returns non-const pointer to a type by its ID. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200926011357.2366158-2-andriin@fb.com --- tools/lib/bpf/btf.c | 139 ++++++++++++++++++++++++++++------------------------ 1 file changed, 75 insertions(+), 64 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index a3d259e614b0..7c9957893ef2 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -31,11 +31,12 @@ struct btf { struct btf_header *hdr; void *data; }; - struct btf_type **types; + __u32 *type_offs; + __u32 type_offs_cap; const char *strings; void *nohdr_data; + void *types_data; __u32 nr_types; - __u32 types_size; __u32 data_size; int fd; int ptr_sz; @@ -46,30 +47,30 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } -static int btf_add_type(struct btf *btf, struct btf_type *t) +static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) { - if (btf->types_size - btf->nr_types < 2) { - struct btf_type **new_types; + /* nr_types is 1-based, so N types means we need N+1-sized array */ + if (btf->nr_types + 2 > btf->type_offs_cap) { + __u32 *new_offs; __u32 expand_by, new_size; - if (btf->types_size == BTF_MAX_NR_TYPES) + if (btf->type_offs_cap == BTF_MAX_NR_TYPES) return -E2BIG; - expand_by = max(btf->types_size >> 2, 16U); - new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); + expand_by = max(btf->type_offs_cap / 4, 16U); + new_size = min(BTF_MAX_NR_TYPES, btf->type_offs_cap + expand_by); - new_types = libbpf_reallocarray(btf->types, new_size, sizeof(*new_types)); - if (!new_types) + new_offs = libbpf_reallocarray(btf->type_offs, new_size, sizeof(*new_offs)); + if (!new_offs) return -ENOMEM; - if (btf->nr_types == 0) - new_types[0] = &btf_void; + new_offs[0] = UINT_MAX; /* VOID is specially handled */ - btf->types = new_types; - btf->types_size = new_size; + btf->type_offs = new_offs; + btf->type_offs_cap = new_size; } - btf->types[++(btf->nr_types)] = t; + btf->type_offs[btf->nr_types + 1] = type_off; return 0; } @@ -147,7 +148,7 @@ static int btf_parse_str_sec(struct btf *btf) return 0; } -static int btf_type_size(struct btf_type *t) +static int btf_type_size(const struct btf_type *t) { int base_size = sizeof(struct btf_type); __u16 vlen = btf_vlen(t); @@ -185,22 +186,25 @@ static int btf_type_size(struct btf_type *t) static int btf_parse_type_sec(struct btf *btf) { struct btf_header *hdr = btf->hdr; - void *nohdr_data = btf->nohdr_data; - void *next_type = nohdr_data + hdr->type_off; - void *end_type = nohdr_data + hdr->str_off; + void *next_type = btf->nohdr_data + hdr->type_off; + void *end_type = next_type + hdr->type_len; + + btf->types_data = next_type; while (next_type < end_type) { - struct btf_type *t = next_type; int type_size; int err; - type_size = btf_type_size(t); + err = btf_add_type_idx_entry(btf, next_type - btf->types_data); + if (err) + return err; + + type_size = btf_type_size(next_type); if (type_size < 0) return type_size; + next_type += type_size; - err = btf_add_type(btf, t); - if (err) - return err; + btf->nr_types++; } return 0; @@ -211,12 +215,20 @@ __u32 btf__get_nr_types(const struct btf *btf) return btf->nr_types; } +/* internal helper returning non-const pointer to a type */ +static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) +{ + if (type_id == 0) + return &btf_void; + + return btf->types_data + btf->type_offs[type_id]; +} + const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { if (type_id > btf->nr_types) return NULL; - - return btf->types[type_id]; + return btf_type_by_id((struct btf *)btf, type_id); } static int determine_ptr_size(const struct btf *btf) @@ -414,7 +426,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return 0; for (i = 1; i <= btf->nr_types; i++) { - const struct btf_type *t = btf->types[i]; + const struct btf_type *t = btf__type_by_id(btf, i); const char *name = btf__name_by_offset(btf, t->name_off); if (name && !strcmp(type_name, name)) @@ -433,7 +445,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, return 0; for (i = 1; i <= btf->nr_types; i++) { - const struct btf_type *t = btf->types[i]; + const struct btf_type *t = btf__type_by_id(btf, i); const char *name; if (btf_kind(t) != kind) @@ -455,7 +467,7 @@ void btf__free(struct btf *btf) close(btf->fd); free(btf->data); - free(btf->types); + free(btf->type_offs); free(btf); } @@ -789,7 +801,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) __u32 i; for (i = 1; i <= btf->nr_types; i++) { - struct btf_type *t = btf->types[i]; + struct btf_type *t = btf_type_by_id(btf, i); /* Loader needs to fix up some of the things compiler * couldn't get its hands on while emitting BTF. This @@ -1655,7 +1667,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, /* special BTF "void" type is made canonical immediately */ d->map[0] = 0; for (i = 1; i <= btf->nr_types; i++) { - struct btf_type *t = d->btf->types[i]; + struct btf_type *t = btf_type_by_id(d->btf, i); /* VAR and DATASEC are never deduped and are self-canonical */ if (btf_is_var(t) || btf_is_datasec(t)) @@ -1694,7 +1706,7 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) struct btf_type *t; for (i = 1; i <= d->btf->nr_types; i++) { - t = d->btf->types[i]; + t = btf_type_by_id(d->btf, i); r = fn(&t->name_off, ctx); if (r) return r; @@ -2229,7 +2241,7 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) */ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) { - struct btf_type *t = d->btf->types[type_id]; + struct btf_type *t = btf_type_by_id(d->btf, type_id); struct hashmap_entry *hash_entry; struct btf_type *cand; /* if we don't find equivalent type, then we are canonical */ @@ -2256,7 +2268,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_int(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; + cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_int(t, cand)) { new_id = cand_id; break; @@ -2268,7 +2280,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_enum(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; + cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_enum(t, cand)) { new_id = cand_id; break; @@ -2291,7 +2303,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_common(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; + cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_common(t, cand)) { new_id = cand_id; break; @@ -2350,13 +2362,13 @@ static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) { __u32 orig_type_id = type_id; - if (!btf_is_fwd(d->btf->types[type_id])) + if (!btf_is_fwd(btf__type_by_id(d->btf, type_id))) return type_id; while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) type_id = d->map[type_id]; - if (!btf_is_fwd(d->btf->types[type_id])) + if (!btf_is_fwd(btf__type_by_id(d->btf, type_id))) return type_id; return orig_type_id; @@ -2484,8 +2496,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) return -ENOMEM; - cand_type = d->btf->types[cand_id]; - canon_type = d->btf->types[canon_id]; + cand_type = btf_type_by_id(d->btf, cand_id); + canon_type = btf_type_by_id(d->btf, canon_id); cand_kind = btf_kind(cand_type); canon_kind = btf_kind(canon_type); @@ -2636,8 +2648,8 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) targ_type_id = d->hypot_map[cand_type_id]; t_id = resolve_type_id(d, targ_type_id); c_id = resolve_type_id(d, cand_type_id); - t_kind = btf_kind(d->btf->types[t_id]); - c_kind = btf_kind(d->btf->types[c_id]); + t_kind = btf_kind(btf__type_by_id(d->btf, t_id)); + c_kind = btf_kind(btf__type_by_id(d->btf, c_id)); /* * Resolve FWD into STRUCT/UNION. * It's ok to resolve FWD into STRUCT/UNION that's not yet @@ -2705,7 +2717,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) if (d->map[type_id] <= BTF_MAX_NR_TYPES) return 0; - t = d->btf->types[type_id]; + t = btf_type_by_id(d->btf, type_id); kind = btf_kind(t); if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) @@ -2726,7 +2738,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because * FWD and compatible STRUCT/UNION are considered equivalent. */ - cand_type = d->btf->types[cand_id]; + cand_type = btf_type_by_id(d->btf, cand_id); if (!btf_shallow_equal_struct(t, cand_type)) continue; @@ -2798,7 +2810,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) if (d->map[type_id] <= BTF_MAX_NR_TYPES) return resolve_type_id(d, type_id); - t = d->btf->types[type_id]; + t = btf_type_by_id(d->btf, type_id); d->map[type_id] = BTF_IN_PROGRESS_ID; switch (btf_kind(t)) { @@ -2816,7 +2828,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_common(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; + cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_common(t, cand)) { new_id = cand_id; break; @@ -2840,7 +2852,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_array(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; + cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_array(t, cand)) { new_id = cand_id; break; @@ -2872,7 +2884,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_fnproto(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; + cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_fnproto(t, cand)) { new_id = cand_id; break; @@ -2920,9 +2932,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d) */ static int btf_dedup_compact_types(struct btf_dedup *d) { - struct btf_type **new_types; + __u32 *new_offs; __u32 next_type_id = 1; - char *types_start, *p; + void *p; int i, len; /* we are going to reuse hypot_map to store compaction remapping */ @@ -2930,41 +2942,40 @@ static int btf_dedup_compact_types(struct btf_dedup *d) for (i = 1; i <= d->btf->nr_types; i++) d->hypot_map[i] = BTF_UNPROCESSED_ID; - types_start = d->btf->nohdr_data + d->btf->hdr->type_off; - p = types_start; + p = d->btf->types_data; for (i = 1; i <= d->btf->nr_types; i++) { if (d->map[i] != i) continue; - len = btf_type_size(d->btf->types[i]); + len = btf_type_size(btf__type_by_id(d->btf, i)); if (len < 0) return len; - memmove(p, d->btf->types[i], len); + memmove(p, btf__type_by_id(d->btf, i), len); d->hypot_map[i] = next_type_id; - d->btf->types[next_type_id] = (struct btf_type *)p; + d->btf->type_offs[next_type_id] = p - d->btf->types_data; p += len; next_type_id++; } /* shrink struct btf's internal types index and update btf_header */ d->btf->nr_types = next_type_id - 1; - d->btf->types_size = d->btf->nr_types; - d->btf->hdr->type_len = p - types_start; - new_types = libbpf_reallocarray(d->btf->types, (1 + d->btf->nr_types), - sizeof(struct btf_type *)); - if (!new_types) + d->btf->type_offs_cap = d->btf->nr_types + 1; + d->btf->hdr->type_len = p - d->btf->types_data; + new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, + sizeof(*new_offs)); + if (!new_offs) return -ENOMEM; - d->btf->types = new_types; + d->btf->type_offs = new_offs; /* make sure string section follows type information without gaps */ - d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; + d->btf->hdr->str_off = p - d->btf->nohdr_data; memmove(p, d->btf->strings, d->btf->hdr->str_len); d->btf->strings = p; p += d->btf->hdr->str_len; - d->btf->data_size = p - (char *)d->btf->data; + d->btf->data_size = p - d->btf->data; return 0; } @@ -2997,7 +3008,7 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) */ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) { - struct btf_type *t = d->btf->types[type_id]; + struct btf_type *t = btf_type_by_id(d->btf, type_id); int i, r; switch (btf_kind(t)) { -- cgit v1.3.1 From b86042478fa083d87f1b67047e788d70b8c81eef Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 25 Sep 2020 18:13:50 -0700 Subject: libbpf: Remove assumption of single contiguous memory for BTF data Refactor internals of struct btf to remove assumptions that BTF header, type data, and string data are layed out contiguously in a memory in a single memory allocation. Now we have three separate pointers pointing to the start of each respective are: header, types, strings. In the next patches, these pointers will be re-assigned to point to independently allocated memory areas, if BTF needs to be modified. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200926011357.2366158-3-andriin@fb.com --- tools/lib/bpf/bpf.c | 2 +- tools/lib/bpf/bpf.h | 2 +- tools/lib/bpf/btf.c | 99 +++++++++++++++++++++++++++++++---------------------- 3 files changed, 60 insertions(+), 43 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index c5a4d8444bf6..70575a37aa14 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -846,7 +846,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); } -int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, +int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) { union bpf_attr attr = {}; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 4f3568e55527..16806810a53c 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -234,7 +234,7 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); -LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, +LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log); LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7c9957893ef2..d180a677a3fb 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -27,18 +27,37 @@ static struct btf_type btf_void; struct btf { - union { - struct btf_header *hdr; - void *data; - }; + void *raw_data; + __u32 raw_size; + + /* + * When BTF is loaded from ELF or raw memory it is stored + * in contiguous memory block, pointed to by raw_data pointer, and + * hdr, types_data, and strs_data point inside that memory region to + * respective parts of BTF representation: + * + * +--------------------------------+ + * | Header | Types | Strings | + * +--------------------------------+ + * ^ ^ ^ + * | | | + * hdr | | + * types_data-+ | + * strs_data------------+ + */ + struct btf_header *hdr; + void *types_data; + void *strs_data; + + /* type ID to `struct btf_type *` lookup index */ __u32 *type_offs; __u32 type_offs_cap; - const char *strings; - void *nohdr_data; - void *types_data; __u32 nr_types; - __u32 data_size; + + /* BTF object FD, if loaded into kernel */ int fd; + + /* Pointer size (in bytes) for a target architecture of this BTF */ int ptr_sz; }; @@ -80,7 +99,7 @@ static int btf_parse_hdr(struct btf *btf) const struct btf_header *hdr = btf->hdr; __u32 meta_left; - if (btf->data_size < sizeof(struct btf_header)) { + if (btf->raw_size < sizeof(struct btf_header)) { pr_debug("BTF header not found\n"); return -EINVAL; } @@ -100,7 +119,7 @@ static int btf_parse_hdr(struct btf *btf) return -ENOTSUP; } - meta_left = btf->data_size - sizeof(*hdr); + meta_left = btf->raw_size - sizeof(*hdr); if (!meta_left) { pr_debug("BTF has no data\n"); return -EINVAL; @@ -126,15 +145,13 @@ static int btf_parse_hdr(struct btf *btf) return -EINVAL; } - btf->nohdr_data = btf->hdr + 1; - return 0; } static int btf_parse_str_sec(struct btf *btf) { const struct btf_header *hdr = btf->hdr; - const char *start = btf->nohdr_data + hdr->str_off; + const char *start = btf->strs_data; const char *end = start + btf->hdr->str_len; if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || @@ -143,8 +160,6 @@ static int btf_parse_str_sec(struct btf *btf) return -EINVAL; } - btf->strings = start; - return 0; } @@ -186,11 +201,9 @@ static int btf_type_size(const struct btf_type *t) static int btf_parse_type_sec(struct btf *btf) { struct btf_header *hdr = btf->hdr; - void *next_type = btf->nohdr_data + hdr->type_off; + void *next_type = btf->types_data; void *end_type = next_type + hdr->type_len; - btf->types_data = next_type; - while (next_type < end_type) { int type_size; int err; @@ -466,7 +479,7 @@ void btf__free(struct btf *btf) if (btf->fd >= 0) close(btf->fd); - free(btf->data); + free(btf->raw_data); free(btf->type_offs); free(btf); } @@ -482,24 +495,24 @@ struct btf *btf__new(const void *data, __u32 size) btf->fd = -1; - btf->data = malloc(size); - if (!btf->data) { + btf->raw_data = malloc(size); + if (!btf->raw_data) { err = -ENOMEM; goto done; } + memcpy(btf->raw_data, data, size); + btf->raw_size = size; - memcpy(btf->data, data, size); - btf->data_size = size; - + btf->hdr = btf->raw_data; err = btf_parse_hdr(btf); if (err) goto done; - err = btf_parse_str_sec(btf); - if (err) - goto done; + btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off; + btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off; - err = btf_parse_type_sec(btf); + err = btf_parse_str_sec(btf); + err = err ?: btf_parse_type_sec(btf); done: if (err) { @@ -820,8 +833,9 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) int btf__load(struct btf *btf) { - __u32 log_buf_size = 0; + __u32 log_buf_size = 0, raw_size; char *log_buf = NULL; + const void *raw_data; int err = 0; if (btf->fd >= 0) @@ -836,8 +850,13 @@ retry_load: *log_buf = 0; } - btf->fd = bpf_load_btf(btf->data, btf->data_size, - log_buf, log_buf_size, false); + raw_data = btf__get_raw_data(btf, &raw_size); + if (!raw_data) { + err = -ENOMEM; + goto done; + } + + btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false); if (btf->fd < 0) { if (!log_buf || errno == ENOSPC) { log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, @@ -870,14 +889,14 @@ void btf__set_fd(struct btf *btf, int fd) const void *btf__get_raw_data(const struct btf *btf, __u32 *size) { - *size = btf->data_size; - return btf->data; + *size = btf->raw_size; + return btf->raw_data; } const char *btf__name_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->hdr->str_len) - return &btf->strings[offset]; + return btf->strs_data + offset; else return NULL; } @@ -1860,8 +1879,7 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) */ static int btf_dedup_strings(struct btf_dedup *d) { - const struct btf_header *hdr = d->btf->hdr; - char *start = (char *)d->btf->nohdr_data + hdr->str_off; + char *start = d->btf->strs_data; char *end = start + d->btf->hdr->str_len; char *p = start, *tmp_strs = NULL; struct btf_str_ptrs strs = { @@ -2970,12 +2988,11 @@ static int btf_dedup_compact_types(struct btf_dedup *d) d->btf->type_offs = new_offs; /* make sure string section follows type information without gaps */ - d->btf->hdr->str_off = p - d->btf->nohdr_data; - memmove(p, d->btf->strings, d->btf->hdr->str_len); - d->btf->strings = p; - p += d->btf->hdr->str_len; + d->btf->hdr->str_off = p - d->btf->types_data; + memmove(p, d->btf->strs_data, d->btf->hdr->str_len); + d->btf->strs_data = p; - d->btf->data_size = p - d->btf->data; + d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len; return 0; } -- cgit v1.3.1 From 192f5a1fe6894dca58d14dc883e6c7030e7267f7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 25 Sep 2020 18:13:51 -0700 Subject: libbpf: Generalize common logic for managing dynamically-sized arrays Managing dynamically-sized array is a common, but not trivial functionality, which significant amount of logic and code to implement properly. So instead of re-implementing it all the time, extract it into a helper function ans reuse. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200926011357.2366158-4-andriin@fb.com --- tools/lib/bpf/btf.c | 77 ++++++++++++++++++++++++++++++----------- tools/lib/bpf/libbpf_internal.h | 3 ++ 2 files changed, 59 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d180a677a3fb..f5255e2bd222 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -51,7 +51,7 @@ struct btf { /* type ID to `struct btf_type *` lookup index */ __u32 *type_offs; - __u32 type_offs_cap; + size_t type_offs_cap; __u32 nr_types; /* BTF object FD, if loaded into kernel */ @@ -66,31 +66,60 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } -static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) +/* Ensure given dynamically allocated memory region pointed to by *data* with + * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough + * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements + * are already used. At most *max_cnt* elements can be ever allocated. + * If necessary, memory is reallocated and all existing data is copied over, + * new pointer to the memory region is stored at *data, new memory region + * capacity (in number of elements) is stored in *cap. + * On success, memory pointer to the beginning of unused memory is returned. + * On error, NULL is returned. + */ +void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, + size_t cur_cnt, size_t max_cnt, size_t add_cnt) { - /* nr_types is 1-based, so N types means we need N+1-sized array */ - if (btf->nr_types + 2 > btf->type_offs_cap) { - __u32 *new_offs; - __u32 expand_by, new_size; + size_t new_cnt; + void *new_data; - if (btf->type_offs_cap == BTF_MAX_NR_TYPES) - return -E2BIG; + if (cur_cnt + add_cnt <= *cap_cnt) + return *data + cur_cnt * elem_sz; - expand_by = max(btf->type_offs_cap / 4, 16U); - new_size = min(BTF_MAX_NR_TYPES, btf->type_offs_cap + expand_by); + /* requested more than the set limit */ + if (cur_cnt + add_cnt > max_cnt) + return NULL; - new_offs = libbpf_reallocarray(btf->type_offs, new_size, sizeof(*new_offs)); - if (!new_offs) - return -ENOMEM; + new_cnt = *cap_cnt; + new_cnt += new_cnt / 4; /* expand by 25% */ + if (new_cnt < 16) /* but at least 16 elements */ + new_cnt = 16; + if (new_cnt > max_cnt) /* but not exceeding a set limit */ + new_cnt = max_cnt; + if (new_cnt < cur_cnt + add_cnt) /* also ensure we have enough memory */ + new_cnt = cur_cnt + add_cnt; + + new_data = libbpf_reallocarray(*data, new_cnt, elem_sz); + if (!new_data) + return NULL; - new_offs[0] = UINT_MAX; /* VOID is specially handled */ + /* zero out newly allocated portion of memory */ + memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz); - btf->type_offs = new_offs; - btf->type_offs_cap = new_size; - } + *data = new_data; + *cap_cnt = new_cnt; + return new_data + cur_cnt * elem_sz; +} - btf->type_offs[btf->nr_types + 1] = type_off; +static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) +{ + __u32 *p; + + p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), + btf->nr_types + 1, BTF_MAX_NR_TYPES, 1); + if (!p) + return -ENOMEM; + *p = type_off; return 0; } @@ -203,11 +232,17 @@ static int btf_parse_type_sec(struct btf *btf) struct btf_header *hdr = btf->hdr; void *next_type = btf->types_data; void *end_type = next_type + hdr->type_len; + int err, type_size; - while (next_type < end_type) { - int type_size; - int err; + /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(), + * so ensure we can never properly use its offset from index by + * setting it to a large value + */ + err = btf_add_type_idx_entry(btf, UINT_MAX); + if (err) + return err; + while (next_type < end_type) { err = btf_add_type_idx_entry(btf, next_type - btf->types_data); if (err) return err; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index d3f7a6a6bc98..eed5b624a784 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -105,6 +105,9 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) return realloc(ptr, total); } +void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, + size_t cur_cnt, size_t max_cnt, size_t add_cnt); + static inline bool libbpf_validate_opts(const char *opts, size_t opts_sz, size_t user_sz, const char *type_name) -- cgit v1.3.1 From 7d9c71e10baa3496d95226aa3bba668f7533ec70 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 25 Sep 2020 18:13:52 -0700 Subject: libbpf: Extract generic string hashing function for reuse Calculating a hash of zero-terminated string is a common need when using hashmap, so extract it for reuse. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200926011357.2366158-5-andriin@fb.com --- tools/lib/bpf/btf_dump.c | 9 +-------- tools/lib/bpf/hashmap.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 6c079b3c8679..91310e528a3a 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -90,14 +90,7 @@ struct btf_dump { static size_t str_hash_fn(const void *key, void *ctx) { - const char *s = key; - size_t h = 0; - - while (*s) { - h = h * 31 + *s; - s++; - } - return h; + return str_hash(key); } static bool str_equal_fn(const void *a, const void *b, void *ctx) diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index e0af36b0e5d8..d9b385fe808c 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits) #endif } +/* generic C-string hashing function */ +static inline size_t str_hash(const char *s) +{ + size_t h = 0; + + while (*s) { + h = h * 31 + *s; + s++; + } + return h; +} + typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); -- cgit v1.3.1 From 919d2b1dbb074d438027135ba644411931179a59 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 25 Sep 2020 18:13:53 -0700 Subject: libbpf: Allow modification of BTF and add btf__add_str API Allow internal BTF representation to switch from default read-only mode, in which raw BTF data is a single non-modifiable block of memory with BTF header, types, and strings layed out sequentially and contiguously in memory, into a writable representation with types and strings data split out into separate memory regions, that can be dynamically expanded. Such writable internal representation is transparent to users of libbpf APIs, but allows to append new types and strings at the end of BTF, which is a typical use case when generating BTF programmatically. All the basic guarantees of BTF types and strings layout is preserved, i.e., user can get `struct btf_type *` pointer and read it directly. Such btf_type pointers might be invalidated if BTF is modified, so some care is required in such mixed read/write scenarios. Switch from read-only to writable configuration happens automatically the first time when user attempts to modify BTF by either adding a new type or new string. It is still possible to get raw BTF data, which is a single piece of memory that can be persisted in ELF section or into a file as raw BTF. Such raw data memory is also still owned by BTF and will be freed either when BTF object is freed or if another modification to BTF happens, as any modification invalidates BTF raw representation. This patch adds the first two BTF manipulation APIs: btf__add_str(), which allows to add arbitrary strings to BTF string section, and btf__find_str() which allows to find existing string offset, but not add it if it's missing. All the added strings are automatically deduplicated. This is achieved by maintaining an additional string lookup index for all unique strings. Such index is built when BTF is switched to modifiable mode. If at that time BTF strings section contained duplicate strings, they are not de-duplicated. This is done specifically to not modify the existing content of BTF (types, their string offsets, etc), which can cause confusion and is especially important property if there is struct btf_ext associated with struct btf. By following this "imperfect deduplication" process, btf_ext is kept consitent and correct. If deduplication of strings is necessary, it can be forced by doing BTF deduplication, at which point all the strings will be eagerly deduplicated and all string offsets both in struct btf and struct btf_ext will be updated. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200926011357.2366158-6-andriin@fb.com --- tools/lib/bpf/btf.c | 260 +++++++++++++++++++++++++++++++++++++++++++++-- tools/lib/bpf/btf.h | 4 + tools/lib/bpf/libbpf.map | 2 + 3 files changed, 258 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index f5255e2bd222..040f3b8ee39f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -44,16 +44,46 @@ struct btf { * hdr | | * types_data-+ | * strs_data------------+ + * + * If BTF data is later modified, e.g., due to types added or + * removed, BTF deduplication performed, etc, this contiguous + * representation is broken up into three independently allocated + * memory regions to be able to modify them independently. + * raw_data is nulled out at that point, but can be later allocated + * and cached again if user calls btf__get_raw_data(), at which point + * raw_data will contain a contiguous copy of header, types, and + * strings: + * + * +----------+ +---------+ +-----------+ + * | Header | | Types | | Strings | + * +----------+ +---------+ +-----------+ + * ^ ^ ^ + * | | | + * hdr | | + * types_data----+ | + * strs_data------------------+ + * + * +----------+---------+-----------+ + * | Header | Types | Strings | + * raw_data----->+----------+---------+-----------+ */ struct btf_header *hdr; + void *types_data; - void *strs_data; + size_t types_data_cap; /* used size stored in hdr->type_len */ /* type ID to `struct btf_type *` lookup index */ __u32 *type_offs; size_t type_offs_cap; __u32 nr_types; + void *strs_data; + size_t strs_data_cap; /* used size stored in hdr->str_len */ + + /* lookup index for each unique string in strings section */ + struct hashmap *strs_hash; + /* whether strings are already deduplicated */ + bool strs_deduped; /* BTF object FD, if loaded into kernel */ int fd; @@ -506,6 +536,11 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, return -ENOENT; } +static bool btf_is_modifiable(const struct btf *btf) +{ + return (void *)btf->hdr != btf->raw_data; +} + void btf__free(struct btf *btf) { if (IS_ERR_OR_NULL(btf)) @@ -514,6 +549,17 @@ void btf__free(struct btf *btf) if (btf->fd >= 0) close(btf->fd); + if (btf_is_modifiable(btf)) { + /* if BTF was modified after loading, it will have a split + * in-memory representation for header, types, and strings + * sections, so we need to free all of them individually. It + * might still have a cached contiguous raw data present, + * which will be unconditionally freed below. + */ + free(btf->hdr); + free(btf->types_data); + free(btf->strs_data); + } free(btf->raw_data); free(btf->type_offs); free(btf); @@ -922,8 +968,29 @@ void btf__set_fd(struct btf *btf, int fd) btf->fd = fd; } -const void *btf__get_raw_data(const struct btf *btf, __u32 *size) +const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) { + struct btf *btf = (struct btf *)btf_ro; + + if (!btf->raw_data) { + struct btf_header *hdr = btf->hdr; + void *data; + + btf->raw_size = hdr->hdr_len + hdr->type_len + hdr->str_len; + btf->raw_data = calloc(1, btf->raw_size); + if (!btf->raw_data) + return NULL; + data = btf->raw_data; + + memcpy(data, hdr, hdr->hdr_len); + data += hdr->hdr_len; + + memcpy(data, btf->types_data, hdr->type_len); + data += hdr->type_len; + + memcpy(data, btf->strs_data, hdr->str_len); + data += hdr->str_len; + } *size = btf->raw_size; return btf->raw_data; } @@ -1071,6 +1138,181 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, return 0; } +static size_t strs_hash_fn(const void *key, void *ctx) +{ + struct btf *btf = ctx; + const char *str = btf->strs_data + (long)key; + + return str_hash(str); +} + +static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx) +{ + struct btf *btf = ctx; + const char *str1 = btf->strs_data + (long)key1; + const char *str2 = btf->strs_data + (long)key2; + + return strcmp(str1, str2) == 0; +} + +/* Ensure BTF is ready to be modified (by splitting into a three memory + * regions for header, types, and strings). Also invalidate cached + * raw_data, if any. + */ +static int btf_ensure_modifiable(struct btf *btf) +{ + void *hdr, *types, *strs, *strs_end, *s; + struct hashmap *hash = NULL; + long off; + int err; + + if (btf_is_modifiable(btf)) { + /* any BTF modification invalidates raw_data */ + if (btf->raw_data) { + free(btf->raw_data); + btf->raw_data = NULL; + } + return 0; + } + + /* split raw data into three memory regions */ + hdr = malloc(btf->hdr->hdr_len); + types = malloc(btf->hdr->type_len); + strs = malloc(btf->hdr->str_len); + if (!hdr || !types || !strs) + goto err_out; + + memcpy(hdr, btf->hdr, btf->hdr->hdr_len); + memcpy(types, btf->types_data, btf->hdr->type_len); + memcpy(strs, btf->strs_data, btf->hdr->str_len); + + /* build lookup index for all strings */ + hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf); + if (IS_ERR(hash)) { + err = PTR_ERR(hash); + hash = NULL; + goto err_out; + } + + strs_end = strs + btf->hdr->str_len; + for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) { + /* hashmap__add() returns EEXIST if string with the same + * content already is in the hash map + */ + err = hashmap__add(hash, (void *)off, (void *)off); + if (err == -EEXIST) + continue; /* duplicate */ + if (err) + goto err_out; + } + + /* only when everything was successful, update internal state */ + btf->hdr = hdr; + btf->types_data = types; + btf->types_data_cap = btf->hdr->type_len; + btf->strs_data = strs; + btf->strs_data_cap = btf->hdr->str_len; + btf->strs_hash = hash; + /* if BTF was created from scratch, all strings are guaranteed to be + * unique and deduplicated + */ + btf->strs_deduped = btf->hdr->str_len <= 1; + + /* invalidate raw_data representation */ + free(btf->raw_data); + btf->raw_data = NULL; + + return 0; + +err_out: + hashmap__free(hash); + free(hdr); + free(types); + free(strs); + return -ENOMEM; +} + +static void *btf_add_str_mem(struct btf *btf, size_t add_sz) +{ + return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1, + btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz); +} + +/* Find an offset in BTF string section that corresponds to a given string *s*. + * Returns: + * - >0 offset into string section, if string is found; + * - -ENOENT, if string is not in the string section; + * - <0, on any other error. + */ +int btf__find_str(struct btf *btf, const char *s) +{ + long old_off, new_off, len; + void *p; + + /* BTF needs to be in a modifiable state to build string lookup index */ + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + /* see btf__add_str() for why we do this */ + len = strlen(s) + 1; + p = btf_add_str_mem(btf, len); + if (!p) + return -ENOMEM; + + new_off = btf->hdr->str_len; + memcpy(p, s, len); + + if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off)) + return old_off; + + return -ENOENT; +} + +/* Add a string s to the BTF string section. + * Returns: + * - > 0 offset into string section, on success; + * - < 0, on error. + */ +int btf__add_str(struct btf *btf, const char *s) +{ + long old_off, new_off, len; + void *p; + int err; + + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + /* Hashmap keys are always offsets within btf->strs_data, so to even + * look up some string from the "outside", we need to first append it + * at the end, so that it can be addressed with an offset. Luckily, + * until btf->hdr->str_len is incremented, that string is just a piece + * of garbage for the rest of BTF code, so no harm, no foul. On the + * other hand, if the string is unique, it's already appended and + * ready to be used, only a simple btf->hdr->str_len increment away. + */ + len = strlen(s) + 1; + p = btf_add_str_mem(btf, len); + if (!p) + return -ENOMEM; + + new_off = btf->hdr->str_len; + memcpy(p, s, len); + + /* Now attempt to add the string, but only if the string with the same + * contents doesn't exist already (HASHMAP_ADD strategy). If such + * string exists, we'll get its offset in old_off (that's old_key). + */ + err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off, + HASHMAP_ADD, (const void **)&old_off, NULL); + if (err == -EEXIST) + return old_off; /* duplicated string, return existing offset */ + if (err) + return err; + + btf->hdr->str_len += len; /* new unique string, adjust data length */ + return new_off; +} + struct btf_ext_sec_setup_param { __u32 off; __u32 len; @@ -1537,6 +1779,9 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, return -EINVAL; } + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + err = btf_dedup_strings(d); if (err < 0) { pr_debug("btf_dedup_strings failed:%d\n", err); @@ -1926,6 +2171,9 @@ static int btf_dedup_strings(struct btf_dedup *d) int i, j, err = 0, grp_idx; bool grp_used; + if (d->btf->strs_deduped) + return 0; + /* build index of all strings */ while (p < end) { if (strs.cnt + 1 > strs.cap) { @@ -2018,6 +2266,7 @@ static int btf_dedup_strings(struct btf_dedup *d) goto done; d->btf->hdr->str_len = end - start; + d->btf->strs_deduped = true; done: free(tmp_strs); @@ -3021,12 +3270,7 @@ static int btf_dedup_compact_types(struct btf_dedup *d) if (!new_offs) return -ENOMEM; d->btf->type_offs = new_offs; - - /* make sure string section follows type information without gaps */ - d->btf->hdr->str_off = p - d->btf->types_data; - memmove(p, d->btf->strs_data, d->btf->hdr->str_len); - d->btf->strs_data = p; - + d->btf->hdr->str_off = d->btf->hdr->type_len; d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len; return 0; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 2a55320d87d0..d88b6447c222 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -5,6 +5,7 @@ #define __LIBBPF_BTF_H #include +#include #include #include @@ -72,6 +73,9 @@ LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API struct btf *libbpf_find_kernel_btf(void); +LIBBPF_API int btf__find_str(struct btf *btf, const char *s); +LIBBPF_API int btf__add_str(struct btf *btf, const char *s); + struct btf_dedup_opts { unsigned int dedup_table_size; bool dont_resolve_fwds; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0623e7a99b1e..f1518c51d2e5 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -305,6 +305,8 @@ LIBBPF_0.2.0 { bpf_prog_bind_map; bpf_prog_test_run_opts; bpf_program__section_name; + btf__add_str; + btf__find_str; perf_buffer__buffer_cnt; perf_buffer__buffer_fd; perf_buffer__epoll_fd; -- cgit v1.3.1 From a871b04310248024e022e104eba3ec81d144cc64 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 25 Sep 2020 18:13:54 -0700 Subject: libbpf: Add btf__new_empty() to create an empty BTF object Add an ability to create an empty BTF object from scratch. This is going to be used by pahole for BTF encoding. And also by selftest for convenient creation of BTF objects. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200926011357.2366158-7-andriin@fb.com --- tools/lib/bpf/btf.c | 30 ++++++++++++++++++++++++++++++ tools/lib/bpf/btf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 32 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 040f3b8ee39f..54861a5ba9d9 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -565,6 +565,36 @@ void btf__free(struct btf *btf) free(btf); } +struct btf *btf__new_empty(void) +{ + struct btf *btf; + + btf = calloc(1, sizeof(*btf)); + if (!btf) + return ERR_PTR(-ENOMEM); + btf->fd = -1; + btf->ptr_sz = sizeof(void *); + + /* +1 for empty string at offset 0 */ + btf->raw_size = sizeof(struct btf_header) + 1; + btf->raw_data = calloc(1, btf->raw_size); + if (!btf->raw_data) { + free(btf); + return ERR_PTR(-ENOMEM); + } + + btf->hdr = btf->raw_data; + btf->hdr->hdr_len = sizeof(struct btf_header); + btf->hdr->magic = BTF_MAGIC; + btf->hdr->version = BTF_VERSION; + + btf->types_data = btf->raw_data + btf->hdr->hdr_len; + btf->strs_data = btf->raw_data + btf->hdr->hdr_len; + btf->hdr->str_len = 1; /* empty string at offset 0 */ + + return btf; +} + struct btf *btf__new(const void *data, __u32 size) { struct btf *btf; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index d88b6447c222..b387d0b61f1c 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -27,6 +27,7 @@ struct bpf_object; LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(const void *data, __u32 size); +LIBBPF_API struct btf *btf__new_empty(void); LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); LIBBPF_API struct btf *btf__parse_raw(const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f1518c51d2e5..d02d8f51e34f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -307,6 +307,7 @@ LIBBPF_0.2.0 { bpf_program__section_name; btf__add_str; btf__find_str; + btf__new_empty; perf_buffer__buffer_cnt; perf_buffer__buffer_fd; perf_buffer__epoll_fd; -- cgit v1.3.1 From c4d0bfb45068d853a478b9067a95969b1886a30f Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:05 +0100 Subject: bpf: Add bpf_snprintf_btf helper A helper is added to support tracing kernel type information in BPF using the BPF Type Format (BTF). Its signature is long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags); struct btf_ptr * specifies - a pointer to the data to be traced - the BTF id of the type of data pointed to - a flags field is provided for future use; these flags are not to be confused with the BTF_F_* flags below that control how the btf_ptr is displayed; the flags member of the struct btf_ptr may be used to disambiguate types in kernel versus module BTF, etc; the main distinction is the flags relate to the type and information needed in identifying it; not how it is displayed. For example a BPF program with a struct sk_buff *skb could do the following: static struct btf_ptr b = { }; b.ptr = skb; b.type_id = __builtin_btf_type_id(struct sk_buff, 1); bpf_snprintf_btf(str, sizeof(str), &b, sizeof(b), 0, 0); Default output looks like this: (struct sk_buff){ .transport_header = (__u16)65535, .mac_header = (__u16)65535, .end = (sk_buff_data_t)192, .head = (unsigned char *)0x000000007524fd8b, .data = (unsigned char *)0x000000007524fd8b, .truesize = (unsigned int)768, .users = (refcount_t){ .refs = (atomic_t){ .counter = (int)1, }, }, } Flags modifying display are as follows: - BTF_F_COMPACT: no formatting around type information - BTF_F_NONAME: no struct/union member names/types - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; equivalent to %px. - BTF_F_ZERO: show zero-valued struct/union members; they are not displayed by default Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-4-git-send-email-alan.maguire@oracle.com --- include/linux/bpf.h | 1 + include/linux/btf.h | 9 +++--- include/uapi/linux/bpf.h | 67 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 4 +++ kernel/trace/bpf_trace.c | 65 ++++++++++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 67 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 212 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e620a4b1290f..768b533ba48e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1822,6 +1822,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; +extern const struct bpf_func_proto bpf_snprintf_btf_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/btf.h b/include/linux/btf.h index d0f5d3c9ec3d..3e5cdc2ba963 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -6,6 +6,7 @@ #include #include +#include #define BTF_TYPE_EMIT(type) ((void)(type *)0) @@ -59,10 +60,10 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, * - BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read * data before displaying it. */ -#define BTF_SHOW_COMPACT (1ULL << 0) -#define BTF_SHOW_NONAME (1ULL << 1) -#define BTF_SHOW_PTR_RAW (1ULL << 2) -#define BTF_SHOW_ZERO (1ULL << 3) +#define BTF_SHOW_COMPACT BTF_F_COMPACT +#define BTF_SHOW_NONAME BTF_F_NONAME +#define BTF_SHOW_PTR_RAW BTF_F_PTR_RAW +#define BTF_SHOW_ZERO BTF_F_ZERO #define BTF_SHOW_UNSAFE (1ULL << 4) void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 82522f05c021..cca9eb1b13e5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3594,6 +3594,42 @@ union bpf_attr { * the data in *dst*. This is a wrapper of **copy_from_user**\ (). * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags) + * Description + * Use BTF to store a string representation of *ptr*->ptr in *str*, + * using *ptr*->type_id. This value should specify the type + * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) + * can be used to look up vmlinux BTF type ids. Traversing the + * data structure using BTF, the type information and values are + * stored in the first *str_size* - 1 bytes of *str*. Safe copy of + * the pointer data is carried out to avoid kernel crashes during + * operation. Smaller types can use string space on the stack; + * larger programs can use map data to store the string + * representation. + * + * The string can be subsequently shared with userspace via + * bpf_perf_event_output() or ring buffer interfaces. + * bpf_trace_printk() is to be avoided as it places too small + * a limit on string size to be useful. + * + * *flags* is a combination of + * + * **BTF_F_COMPACT** + * no formatting around type information + * **BTF_F_NONAME** + * no struct/union member names/types + * **BTF_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes that were written (or would have been + * written if output had to be truncated due to string size), + * or a negative error in cases of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3745,6 +3781,7 @@ union bpf_attr { FN(inode_storage_delete), \ FN(d_path), \ FN(copy_from_user), \ + FN(snprintf_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4853,4 +4890,34 @@ struct bpf_sk_lookup { __u32 local_port; /* Host byte order */ }; +/* + * struct btf_ptr is used for typed pointer representation; the + * type id is used to render the pointer data as the appropriate type + * via the bpf_snprintf_btf() helper described above. A flags field - + * potentially to specify additional details about the BTF pointer + * (rather than its mode of display) - is included for future use. + * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. + */ +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_snprintf_btf() behaviour. + * - BTF_F_COMPACT: no formatting around type information + * - BTF_F_NONAME: no struct/union member names/types + * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c4811b139caa..403fb2341a86 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2216,6 +2216,7 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_local_storage_proto __weak; const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; +const struct bpf_func_proto bpf_snprintf_btf_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5cc7425ee476..e825441781ab 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -683,6 +683,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) if (!perfmon_capable()) return NULL; return bpf_get_trace_printk_proto(); + case BPF_FUNC_snprintf_btf: + if (!perfmon_capable()) + return NULL; + return &bpf_snprintf_btf_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; default: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2834866d379a..140e1be9dab6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,9 @@ #include #include +#include +#include + #include #include "trace_probe.h" @@ -1147,6 +1151,65 @@ static const struct bpf_func_proto bpf_d_path_proto = { .allowed = bpf_d_path_allowed, }; +#define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \ + BTF_F_PTR_RAW | BTF_F_ZERO) + +static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, + u64 flags, const struct btf **btf, + s32 *btf_id) +{ + const struct btf_type *t; + + if (unlikely(flags & ~(BTF_F_ALL))) + return -EINVAL; + + if (btf_ptr_size != sizeof(struct btf_ptr)) + return -EINVAL; + + *btf = bpf_get_btf_vmlinux(); + + if (IS_ERR_OR_NULL(*btf)) + return PTR_ERR(*btf); + + if (ptr->type_id > 0) + *btf_id = ptr->type_id; + else + return -EINVAL; + + if (*btf_id > 0) + t = btf_type_by_id(*btf, *btf_id); + if (*btf_id <= 0 || !t) + return -ENOENT; + + return 0; +} + +BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr, + u32, btf_ptr_size, u64, flags) +{ + const struct btf *btf; + s32 btf_id; + int ret; + + ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); + if (ret) + return ret; + + return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size, + flags); +} + +const struct bpf_func_proto bpf_snprintf_btf_proto = { + .func = bpf_snprintf_btf, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_PTR_TO_MEM, + .arg4_type = ARG_CONST_SIZE, + .arg5_type = ARG_ANYTHING, +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1233,6 +1296,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_task_stack_proto; case BPF_FUNC_copy_from_user: return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; + case BPF_FUNC_snprintf_btf: + return &bpf_snprintf_btf_proto; default: return NULL; } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 08388173973f..7d86fdd190be 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -433,6 +433,7 @@ class PrinterHelpers(Printer): 'struct sk_msg_md', 'struct xdp_md', 'struct path', + 'struct btf_ptr', ] known_types = { '...', @@ -474,6 +475,7 @@ class PrinterHelpers(Printer): 'struct udp6_sock', 'struct task_struct', 'struct path', + 'struct btf_ptr', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 82522f05c021..cca9eb1b13e5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3594,6 +3594,42 @@ union bpf_attr { * the data in *dst*. This is a wrapper of **copy_from_user**\ (). * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags) + * Description + * Use BTF to store a string representation of *ptr*->ptr in *str*, + * using *ptr*->type_id. This value should specify the type + * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) + * can be used to look up vmlinux BTF type ids. Traversing the + * data structure using BTF, the type information and values are + * stored in the first *str_size* - 1 bytes of *str*. Safe copy of + * the pointer data is carried out to avoid kernel crashes during + * operation. Smaller types can use string space on the stack; + * larger programs can use map data to store the string + * representation. + * + * The string can be subsequently shared with userspace via + * bpf_perf_event_output() or ring buffer interfaces. + * bpf_trace_printk() is to be avoided as it places too small + * a limit on string size to be useful. + * + * *flags* is a combination of + * + * **BTF_F_COMPACT** + * no formatting around type information + * **BTF_F_NONAME** + * no struct/union member names/types + * **BTF_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes that were written (or would have been + * written if output had to be truncated due to string size), + * or a negative error in cases of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3745,6 +3781,7 @@ union bpf_attr { FN(inode_storage_delete), \ FN(d_path), \ FN(copy_from_user), \ + FN(snprintf_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4853,4 +4890,34 @@ struct bpf_sk_lookup { __u32 local_port; /* Host byte order */ }; +/* + * struct btf_ptr is used for typed pointer representation; the + * type id is used to render the pointer data as the appropriate type + * via the bpf_snprintf_btf() helper described above. A flags field - + * potentially to specify additional details about the BTF pointer + * (rather than its mode of display) - is included for future use. + * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. + */ +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_snprintf_btf() behaviour. + * - BTF_F_COMPACT: no formatting around type information + * - BTF_F_NONAME: no struct/union member names/types + * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.3.1 From 076a95f5aff2ce7299d1966dee06de2723554c24 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:06 +0100 Subject: selftests/bpf: Add bpf_snprintf_btf helper tests Tests verifying snprintf()ing of various data structures, flags combinations using a tp_btf program. Tests are skipped if __builtin_btf_type_id is not available to retrieve BTF type ids. Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-5-git-send-email-alan.maguire@oracle.com --- .../selftests/bpf/prog_tests/snprintf_btf.c | 60 +++++ .../selftests/bpf/progs/netif_receive_skb.c | 249 +++++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf_btf.c create mode 100644 tools/testing/selftests/bpf/progs/netif_receive_skb.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c new file mode 100644 index 000000000000..3a8ecf833adc --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "netif_receive_skb.skel.h" + +/* Demonstrate that bpf_snprintf_btf succeeds and that various data types + * are formatted correctly. + */ +void test_snprintf_btf(void) +{ + struct netif_receive_skb *skel; + struct netif_receive_skb__bss *bss; + int err, duration = 0; + + skel = netif_receive_skb__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = netif_receive_skb__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + bss = skel->bss; + + err = netif_receive_skb__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* generate receive event */ + system("ping -c 1 127.0.0.1 > /dev/null"); + + if (bss->skip) { + printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); + test__skip(); + goto cleanup; + } + + /* + * Make sure netif_receive_skb program was triggered + * and it set expected return values from bpf_trace_printk()s + * and all tests ran. + */ + if (CHECK(bss->ret <= 0, + "bpf_snprintf_btf: got return value", + "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests)) + goto cleanup; + + if (CHECK(bss->ran_subtests == 0, "check if subtests ran", + "no subtests ran, did BPF program run?")) + goto cleanup; + + if (CHECK(bss->num_subtests != bss->ran_subtests, + "check all subtests ran", + "only ran %d of %d tests\n", bss->num_subtests, + bss->ran_subtests)) + goto cleanup; + +cleanup: + netif_receive_skb__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c new file mode 100644 index 000000000000..b873d805a345 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Oracle and/or its affiliates. */ + +#include "vmlinux.h" +#include +#include +#include + +#include + +long ret = 0; +int num_subtests = 0; +int ran_subtests = 0; +bool skip = false; + +#define STRSIZE 2048 +#define EXPECTED_STRSIZE 256 + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, char[STRSIZE]); +} strdata SEC(".maps"); + +static int __strncmp(const void *m1, const void *m2, size_t len) +{ + const unsigned char *s1 = m1; + const unsigned char *s2 = m2; + int i, delta = 0; + + for (i = 0; i < len; i++) { + delta = s1[i] - s2[i]; + if (delta || s1[i] == 0 || s2[i] == 0) + break; + } + return delta; +} + +#if __has_builtin(__builtin_btf_type_id) +#define TEST_BTF(_str, _type, _flags, _expected, ...) \ + do { \ + static const char _expectedval[EXPECTED_STRSIZE] = \ + _expected; \ + static const char _ptrtype[64] = #_type; \ + __u64 _hflags = _flags | BTF_F_COMPACT; \ + static _type _ptrdata = __VA_ARGS__; \ + static struct btf_ptr _ptr = { }; \ + int _cmp; \ + \ + ++num_subtests; \ + if (ret < 0) \ + break; \ + ++ran_subtests; \ + _ptr.ptr = &_ptrdata; \ + _ptr.type_id = bpf_core_type_id_kernel(_type); \ + if (_ptr.type_id <= 0) { \ + ret = -EINVAL; \ + break; \ + } \ + ret = bpf_snprintf_btf(_str, STRSIZE, \ + &_ptr, sizeof(_ptr), _hflags); \ + if (ret) \ + break; \ + _cmp = __strncmp(_str, _expectedval, EXPECTED_STRSIZE); \ + if (_cmp != 0) { \ + bpf_printk("(%d) got %s", _cmp, _str); \ + bpf_printk("(%d) expected %s", _cmp, \ + _expectedval); \ + ret = -EBADMSG; \ + break; \ + } \ + } while (0) +#endif + +/* Use where expected data string matches its stringified declaration */ +#define TEST_BTF_C(_str, _type, _flags, ...) \ + TEST_BTF(_str, _type, _flags, "(" #_type ")" #__VA_ARGS__, \ + __VA_ARGS__) + +/* TRACE_EVENT(netif_receive_skb, + * TP_PROTO(struct sk_buff *skb), + */ +SEC("tp_btf/netif_receive_skb") +int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb) +{ + static __u64 flags[] = { 0, BTF_F_COMPACT, BTF_F_ZERO, BTF_F_PTR_RAW, + BTF_F_NONAME, BTF_F_COMPACT | BTF_F_ZERO | + BTF_F_PTR_RAW | BTF_F_NONAME }; + static struct btf_ptr p = { }; + __u32 key = 0; + int i, __ret; + char *str; + +#if __has_builtin(__builtin_btf_type_id) + str = bpf_map_lookup_elem(&strdata, &key); + if (!str) + return 0; + + /* Ensure we can write skb string representation */ + p.type_id = bpf_core_type_id_kernel(struct sk_buff); + p.ptr = skb; + for (i = 0; i < ARRAY_SIZE(flags); i++) { + ++num_subtests; + ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0); + if (ret < 0) + bpf_printk("returned %d when writing skb", ret); + ++ran_subtests; + } + + /* Check invalid ptr value */ + p.ptr = 0; + __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0); + if (__ret >= 0) { + bpf_printk("printing NULL should generate error, got (%d)", + __ret); + ret = -ERANGE; + } + + /* Verify type display for various types. */ + + /* simple int */ + TEST_BTF_C(str, int, 0, 1234); + TEST_BTF(str, int, BTF_F_NONAME, "1234", 1234); + /* zero value should be printed at toplevel */ + TEST_BTF(str, int, 0, "(int)0", 0); + TEST_BTF(str, int, BTF_F_NONAME, "0", 0); + TEST_BTF(str, int, BTF_F_ZERO, "(int)0", 0); + TEST_BTF(str, int, BTF_F_NONAME | BTF_F_ZERO, "0", 0); + TEST_BTF_C(str, int, 0, -4567); + TEST_BTF(str, int, BTF_F_NONAME, "-4567", -4567); + + /* simple char */ + TEST_BTF_C(str, char, 0, 100); + TEST_BTF(str, char, BTF_F_NONAME, "100", 100); + /* zero value should be printed at toplevel */ + TEST_BTF(str, char, 0, "(char)0", 0); + TEST_BTF(str, char, BTF_F_NONAME, "0", 0); + TEST_BTF(str, char, BTF_F_ZERO, "(char)0", 0); + TEST_BTF(str, char, BTF_F_NONAME | BTF_F_ZERO, "0", 0); + + /* simple typedef */ + TEST_BTF_C(str, uint64_t, 0, 100); + TEST_BTF(str, u64, BTF_F_NONAME, "1", 1); + /* zero value should be printed at toplevel */ + TEST_BTF(str, u64, 0, "(u64)0", 0); + TEST_BTF(str, u64, BTF_F_NONAME, "0", 0); + TEST_BTF(str, u64, BTF_F_ZERO, "(u64)0", 0); + TEST_BTF(str, u64, BTF_F_NONAME|BTF_F_ZERO, "0", 0); + + /* typedef struct */ + TEST_BTF_C(str, atomic_t, 0, {.counter = (int)1,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME, "{1,}", {.counter = 1,}); + /* typedef with 0 value should be printed at toplevel */ + TEST_BTF(str, atomic_t, 0, "(atomic_t){}", {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME, "{}", {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_ZERO, "(atomic_t){.counter = (int)0,}", + {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME|BTF_F_ZERO, + "{0,}", {.counter = 0,}); + + /* enum where enum value does (and does not) exist */ + TEST_BTF_C(str, enum bpf_cmd, 0, BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, 0, "(enum bpf_cmd)BPF_MAP_CREATE", 0); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO, + "BPF_MAP_CREATE", 0); + + TEST_BTF(str, enum bpf_cmd, BTF_F_ZERO, "(enum bpf_cmd)BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO, + "BPF_MAP_CREATE", BPF_MAP_CREATE); + TEST_BTF_C(str, enum bpf_cmd, 0, 2000); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "2000", 2000); + + /* simple struct */ + TEST_BTF_C(str, struct btf_enum, 0, + {.name_off = (__u32)3,.val = (__s32)-1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{3,-1,}", + { .name_off = 3, .val = -1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{-1,}", + { .name_off = 0, .val = -1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME|BTF_F_ZERO, "{0,-1,}", + { .name_off = 0, .val = -1,}); + /* empty struct should be printed */ + TEST_BTF(str, struct btf_enum, 0, "(struct btf_enum){}", + { .name_off = 0, .val = 0,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{}", + { .name_off = 0, .val = 0,}); + TEST_BTF(str, struct btf_enum, BTF_F_ZERO, + "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}", + { .name_off = 0, .val = 0,}); + + /* struct with pointers */ + TEST_BTF(str, struct list_head, BTF_F_PTR_RAW, + "(struct list_head){.next = (struct list_head *)0x0000000000000001,}", + { .next = (struct list_head *)1 }); + /* NULL pointer should not be displayed */ + TEST_BTF(str, struct list_head, BTF_F_PTR_RAW, + "(struct list_head){}", + { .next = (struct list_head *)0 }); + + /* struct with char array */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){.name = (char[])['f','o','o',],}", + { .name = "foo",}); + TEST_BTF(str, struct bpf_prog_info, BTF_F_NONAME, + "{['f','o','o',],}", + {.name = "foo",}); + /* leading null char means do not display string */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){}", + {.name = {'\0', 'f', 'o', 'o'}}); + /* handle non-printable characters */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){.name = (char[])[1,2,3,],}", + { .name = {1, 2, 3, 0}}); + + /* struct with non-char array */ + TEST_BTF(str, struct __sk_buff, 0, + "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}", + { .cb = {1, 2, 3, 4, 5,},}); + TEST_BTF(str, struct __sk_buff, BTF_F_NONAME, + "{[1,2,3,4,5,],}", + { .cb = { 1, 2, 3, 4, 5},}); + /* For non-char, arrays, show non-zero values only */ + TEST_BTF(str, struct __sk_buff, 0, + "(struct __sk_buff){.cb = (__u32[])[1,],}", + { .cb = { 0, 0, 1, 0, 0},}); + + /* struct with bitfields */ + TEST_BTF_C(str, struct bpf_insn, 0, + {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,}); + TEST_BTF(str, struct bpf_insn, BTF_F_NONAME, "{1,0x2,0x3,4,5,}", + {.code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4, + .imm = 5,}); +#else + skip = true; +#endif + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From eb58bbf2e5c7917aa30bf8818761f26bbeeb2290 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:08 +0100 Subject: selftests/bpf: Fix overflow tests to reflect iter size increase bpf iter size increase to PAGE_SIZE << 3 means overflow tests assuming page size need to be bumped also. Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-7-git-send-email-alan.maguire@oracle.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index fe1a83b9875c..ad9de136158e 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -352,7 +352,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) struct bpf_map_info map_info = {}; struct bpf_iter_test_kern4 *skel; struct bpf_link *link; - __u32 page_size; + __u32 iter_size; char *buf; skel = bpf_iter_test_kern4__open(); @@ -374,19 +374,19 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) "map_creation failed: %s\n", strerror(errno))) goto free_map1; - /* bpf_seq_printf kernel buffer is one page, so one map + /* bpf_seq_printf kernel buffer is 8 pages, so one map * bpf_seq_write will mostly fill it, and the other map * will partially fill and then trigger overflow and need * bpf_seq_read restart. */ - page_size = sysconf(_SC_PAGE_SIZE); + iter_size = sysconf(_SC_PAGE_SIZE) << 3; if (test_e2big_overflow) { - skel->rodata->print_len = (page_size + 8) / 8; - expected_read_len = 2 * (page_size + 8); + skel->rodata->print_len = (iter_size + 8) / 8; + expected_read_len = 2 * (iter_size + 8); } else if (!ret1) { - skel->rodata->print_len = (page_size - 8) / 8; - expected_read_len = 2 * (page_size - 8); + skel->rodata->print_len = (iter_size - 8) / 8; + expected_read_len = 2 * (iter_size - 8); } else { skel->rodata->print_len = 1; expected_read_len = 2 * 8; -- cgit v1.3.1 From eb411377aed9e27835e77ee0710ee8f4649958f3 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:09 +0100 Subject: bpf: Add bpf_seq_printf_btf helper A helper is added to allow seq file writing of kernel data structures using vmlinux BTF. Its signature is long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags); Flags and struct btf_ptr definitions/use are identical to the bpf_snprintf_btf helper, and the helper returns 0 on success or a negative error value. Suggested-by: Alexei Starovoitov Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-8-git-send-email-alan.maguire@oracle.com --- include/linux/btf.h | 2 ++ include/uapi/linux/bpf.h | 9 +++++++++ kernel/bpf/btf.c | 4 ++-- kernel/bpf/core.c | 1 + kernel/trace/bpf_trace.c | 33 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 6 files changed, 56 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/linux/btf.h b/include/linux/btf.h index 3e5cdc2ba963..024e16ff7dcc 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -68,6 +68,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); +int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, void *obj, + struct seq_file *m, u64 flags); /* * Copy len bytes of string representation of obj of BTF type_id into buf. diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cca9eb1b13e5..96ddb00b91dc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3630,6 +3630,14 @@ union bpf_attr { * The number of bytes that were written (or would have been * written if output had to be truncated due to string size), * or a negative error in cases of failure. + * + * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags) + * Description + * Use BTF to write to seq_write a string representation of + * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf(). + * *flags* are identical to those used for bpf_snprintf_btf. + * Return + * 0 on success or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3782,6 +3790,7 @@ union bpf_attr { FN(d_path), \ FN(copy_from_user), \ FN(snprintf_btf), \ + FN(seq_printf_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index dcdd7109aa29..498e5e553825 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5346,8 +5346,8 @@ static void btf_seq_show(struct btf_show *show, const char *fmt, seq_vprintf((struct seq_file *)show->target, fmt, args); } -static int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, - void *obj, struct seq_file *m, u64 flags) +int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, + void *obj, struct seq_file *m, u64 flags) { struct btf_show sseq; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 403fb2341a86..c4ba45fa4fe1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2217,6 +2217,7 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_local_storage_proto __weak; const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_snprintf_btf_proto __weak; +const struct bpf_func_proto bpf_seq_printf_btf_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 140e1be9dab6..e118a83439c3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -71,6 +71,10 @@ static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, + u64 flags, const struct btf **btf, + s32 *btf_id); + /** * trace_call_bpf - invoke BPF program * @call: tracepoint event @@ -776,6 +780,31 @@ static const struct bpf_func_proto bpf_seq_write_proto = { .arg3_type = ARG_CONST_SIZE_OR_ZERO, }; +BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr, + u32, btf_ptr_size, u64, flags) +{ + const struct btf *btf; + s32 btf_id; + int ret; + + ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); + if (ret) + return ret; + + return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags); +} + +static const struct bpf_func_proto bpf_seq_printf_btf_proto = { + .func = bpf_seq_printf_btf, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_seq_file_ids[0], + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, +}; + static __always_inline int get_map_perf_counter(struct bpf_map *map, u64 flags, u64 *value, u64 *enabled, u64 *running) @@ -1695,6 +1724,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->expected_attach_type == BPF_TRACE_ITER ? &bpf_seq_write_proto : NULL; + case BPF_FUNC_seq_printf_btf: + return prog->expected_attach_type == BPF_TRACE_ITER ? + &bpf_seq_printf_btf_proto : + NULL; case BPF_FUNC_d_path: return &bpf_d_path_proto; default: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cca9eb1b13e5..96ddb00b91dc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3630,6 +3630,14 @@ union bpf_attr { * The number of bytes that were written (or would have been * written if output had to be truncated due to string size), * or a negative error in cases of failure. + * + * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags) + * Description + * Use BTF to write to seq_write a string representation of + * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf(). + * *flags* are identical to those used for bpf_snprintf_btf. + * Return + * 0 on success or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3782,6 +3790,7 @@ union bpf_attr { FN(d_path), \ FN(copy_from_user), \ FN(snprintf_btf), \ + FN(seq_printf_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From b72091bd4ee4f4bfdb50468b7465cbc5a7b4442b Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:10 +0100 Subject: selftests/bpf: Add test for bpf_seq_printf_btf helper Add a test verifying iterating over tasks and displaying BTF representation of task_struct succeeds. Suggested-by: Alexei Starovoitov Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-9-git-send-email-alan.maguire@oracle.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 74 ++++++++++++++++++++++ .../selftests/bpf/progs/bpf_iter_task_btf.c | 50 +++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index ad9de136158e..af15630a24dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -7,6 +7,7 @@ #include "bpf_iter_task.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" +#include "bpf_iter_task_btf.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" #include "bpf_iter_udp4.skel.h" @@ -167,6 +168,77 @@ done: bpf_iter_task_file__destroy(skel); } +#define TASKBUFSZ 32768 + +static char taskbuf[TASKBUFSZ]; + +static void do_btf_read(struct bpf_iter_task_btf *skel) +{ + struct bpf_program *prog = skel->progs.dump_task_struct; + struct bpf_iter_task_btf__bss *bss = skel->bss; + int iter_fd = -1, len = 0, bufleft = TASKBUFSZ; + struct bpf_link *link; + char *buf = taskbuf; + + link = bpf_program__attach_iter(prog, NULL); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + do { + len = read(iter_fd, buf, bufleft); + if (len > 0) { + buf += len; + bufleft -= len; + } + } while (len > 0); + + if (bss->skip) { + printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); + test__skip(); + goto free_link; + } + + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto free_link; + + CHECK(strstr(taskbuf, "(struct task_struct)") == NULL, + "check for btf representation of task_struct in iter data", + "struct task_struct not found"); +free_link: + if (iter_fd > 0) + close(iter_fd); + bpf_link__destroy(link); +} + +static void test_task_btf(void) +{ + struct bpf_iter_task_btf__bss *bss; + struct bpf_iter_task_btf *skel; + + skel = bpf_iter_task_btf__open_and_load(); + if (CHECK(!skel, "bpf_iter_task_btf__open_and_load", + "skeleton open_and_load failed\n")) + return; + + bss = skel->bss; + + do_btf_read(skel); + + if (CHECK(bss->tasks == 0, "check if iterated over tasks", + "no task iteration, did BPF program run?\n")) + goto cleanup; + + CHECK(bss->seq_err != 0, "check for unexpected err", + "bpf_seq_printf_btf returned %ld", bss->seq_err); + +cleanup: + bpf_iter_task_btf__destroy(skel); +} + static void test_tcp4(void) { struct bpf_iter_tcp4 *skel; @@ -957,6 +1029,8 @@ void test_bpf_iter(void) test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); + if (test__start_subtest("task_btf")) + test_task_btf(); if (test__start_subtest("tcp4")) test_tcp4(); if (test__start_subtest("tcp6")) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c new file mode 100644 index 000000000000..a1ddc36f13ec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Oracle and/or its affiliates. */ +#include "bpf_iter.h" +#include +#include +#include + +#include + +char _license[] SEC("license") = "GPL"; + +long tasks = 0; +long seq_err = 0; +bool skip = false; + +SEC("iter/task") +int dump_task_struct(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + static struct btf_ptr ptr = { }; + long ret; + +#if __has_builtin(__builtin_btf_type_id) + ptr.type_id = bpf_core_type_id_kernel(struct task_struct); + ptr.ptr = task; + + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, "Raw BTF task\n"); + + ret = bpf_seq_printf_btf(seq, &ptr, sizeof(ptr), 0); + switch (ret) { + case 0: + tasks++; + break; + case -ERANGE: + /* NULL task or task->fs, don't count it as an error. */ + break; + case -E2BIG: + return 1; + default: + seq_err = ret; + break; + } +#else + skip = true; +#endif + + return 0; +} -- cgit v1.3.1 From 4a3b33f8579af182ebcf9c7e9304effce505500e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 28 Sep 2020 19:05:30 -0700 Subject: libbpf: Add BTF writing APIs Add APIs for appending new BTF types at the end of BTF object. Each BTF kind has either one API of the form btf__add_(). For types that have variable amount of additional items (struct/union, enum, func_proto, datasec), additional API is provided to emit each such item. E.g., for emitting a struct, one would use the following sequence of API calls: btf__add_struct(...); btf__add_field(...); ... btf__add_field(...); Each btf__add_field() will ensure that the last BTF type is of STRUCT or UNION kind and will automatically increment that type's vlen field. All the strings are provided as C strings (const char *), not a string offset. This significantly improves usability of BTF writer APIs. All such strings will be automatically appended to string section or existing string will be re-used, if such string was already added previously. Each API attempts to do all the reasonable validations, like enforcing non-empty names for entities with required names, proper value bounds, various bit offset restrictions, etc. Type ID validation is minimal because it's possible to emit a type that refers to type that will be emitted later, so libbpf has no way to enforce such cases. User must be careful to properly emit all the necessary types and specify type IDs that will be valid in the finally generated BTF. Each of btf__add_() APIs return new type ID on success or negative value on error. APIs like btf__add_field() that emit additional items return zero on success and negative value on error. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200929020533.711288-2-andriin@fb.com --- tools/lib/bpf/btf.c | 781 +++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/btf.h | 38 +++ tools/lib/bpf/libbpf.map | 19 ++ 3 files changed, 838 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 54861a5ba9d9..7533088b2524 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1343,6 +1343,787 @@ int btf__add_str(struct btf *btf, const char *s) return new_off; } +static void *btf_add_type_mem(struct btf *btf, size_t add_sz) +{ + return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1, + btf->hdr->type_len, UINT_MAX, add_sz); +} + +static __u32 btf_type_info(int kind, int vlen, int kflag) +{ + return (kflag << 31) | (kind << 24) | vlen; +} + +static void btf_type_inc_vlen(struct btf_type *t) +{ + t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); +} + +/* + * Append new BTF_KIND_INT type with: + * - *name* - non-empty, non-NULL type name; + * - *sz* - power-of-2 (1, 2, 4, ..) size of the type, in bytes; + * - encoding is a combination of BTF_INT_SIGNED, BTF_INT_CHAR, BTF_INT_BOOL. + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding) +{ + struct btf_type *t; + int sz, err, name_off; + + /* non-empty name */ + if (!name || !name[0]) + return -EINVAL; + /* byte_sz must be power of 2 */ + if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16) + return -EINVAL; + if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL)) + return -EINVAL; + + /* deconstruct BTF, if necessary, and invalidate raw_data */ + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_type) + sizeof(int); + t = btf_add_type_mem(btf, sz); + if (!t) + return -ENOMEM; + + /* if something goes wrong later, we might end up with an extra string, + * but that shouldn't be a problem, because BTF can't be constructed + * completely anyway and will most probably be just discarded + */ + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + + t->name_off = name_off; + t->info = btf_type_info(BTF_KIND_INT, 0, 0); + t->size = byte_sz; + /* set INT info, we don't allow setting legacy bit offset/size */ + *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8); + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + btf->nr_types++; + return btf->nr_types; +} + +/* it's completely legal to append BTF types with type IDs pointing forward to + * types that haven't been appended yet, so we only make sure that id looks + * sane, we can't guarantee that ID will always be valid + */ +static int validate_type_id(int id) +{ + if (id < 0 || id > BTF_MAX_NR_TYPES) + return -EINVAL; + return 0; +} + +/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */ +static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) +{ + struct btf_type *t; + int sz, name_off = 0, err; + + if (validate_type_id(ref_type_id)) + return -EINVAL; + + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_type); + t = btf_add_type_mem(btf, sz); + if (!t) + return -ENOMEM; + + if (name && name[0]) { + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + } + + t->name_off = name_off; + t->info = btf_type_info(kind, 0, 0); + t->type = ref_type_id; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + btf->nr_types++; + return btf->nr_types; +} + +/* + * Append new BTF_KIND_PTR type with: + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_ptr(struct btf *btf, int ref_type_id) +{ + return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id); +} + +/* + * Append new BTF_KIND_ARRAY type with: + * - *index_type_id* - type ID of the type describing array index; + * - *elem_type_id* - type ID of the type describing array element; + * - *nr_elems* - the size of the array; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 nr_elems) +{ + struct btf_type *t; + struct btf_array *a; + int sz, err; + + if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) + return -EINVAL; + + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_type) + sizeof(struct btf_array); + t = btf_add_type_mem(btf, sz); + if (!t) + return -ENOMEM; + + t->name_off = 0; + t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0); + t->size = 0; + + a = btf_array(t); + a->type = elem_type_id; + a->index_type = index_type_id; + a->nelems = nr_elems; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + btf->nr_types++; + return btf->nr_types; +} + +/* generic STRUCT/UNION append function */ +static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz) +{ + struct btf_type *t; + int sz, err, name_off = 0; + + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_type); + t = btf_add_type_mem(btf, sz); + if (!t) + return -ENOMEM; + + if (name && name[0]) { + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + } + + /* start out with vlen=0 and no kflag; this will be adjusted when + * adding each member + */ + t->name_off = name_off; + t->info = btf_type_info(kind, 0, 0); + t->size = bytes_sz; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + btf->nr_types++; + return btf->nr_types; +} + +/* + * Append new BTF_KIND_STRUCT type with: + * - *name* - name of the struct, can be NULL or empty for anonymous structs; + * - *byte_sz* - size of the struct, in bytes; + * + * Struct initially has no fields in it. Fields can be added by + * btf__add_field() right after btf__add_struct() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_struct(struct btf *btf, const char *name, __u32 byte_sz) +{ + return btf_add_composite(btf, BTF_KIND_STRUCT, name, byte_sz); +} + +/* + * Append new BTF_KIND_UNION type with: + * - *name* - name of the union, can be NULL or empty for anonymous union; + * - *byte_sz* - size of the union, in bytes; + * + * Union initially has no fields in it. Fields can be added by + * btf__add_field() right after btf__add_union() succeeds. All fields + * should have *bit_offset* of 0. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) +{ + return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz); +} + +/* + * Append new field for the current STRUCT/UNION type with: + * - *name* - name of the field, can be NULL or empty for anonymous field; + * - *type_id* - type ID for the type describing field type; + * - *bit_offset* - bit offset of the start of the field within struct/union; + * - *bit_size* - bit size of a bitfield, 0 for non-bitfield fields; + * Returns: + * - 0, on success; + * - <0, on error. + */ +int btf__add_field(struct btf *btf, const char *name, int type_id, + __u32 bit_offset, __u32 bit_size) +{ + struct btf_type *t; + struct btf_member *m; + bool is_bitfield; + int sz, name_off = 0; + + /* last type should be union/struct */ + if (btf->nr_types == 0) + return -EINVAL; + t = btf_type_by_id(btf, btf->nr_types); + if (!btf_is_composite(t)) + return -EINVAL; + + if (validate_type_id(type_id)) + return -EINVAL; + /* best-effort bit field offset/size enforcement */ + is_bitfield = bit_size || (bit_offset % 8 != 0); + if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff)) + return -EINVAL; + + /* only offset 0 is allowed for unions */ + if (btf_is_union(t) && bit_offset) + return -EINVAL; + + /* decompose and invalidate raw data */ + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_member); + m = btf_add_type_mem(btf, sz); + if (!m) + return -ENOMEM; + + if (name && name[0]) { + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + } + + m->name_off = name_off; + m->type = type_id; + m->offset = bit_offset | (bit_size << 24); + + /* btf_add_type_mem can invalidate t pointer */ + t = btf_type_by_id(btf, btf->nr_types); + /* update parent type's vlen and kflag */ + t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + return 0; +} + +/* + * Append new BTF_KIND_ENUM type with: + * - *name* - name of the enum, can be NULL or empty for anonymous enums; + * - *byte_sz* - size of the enum, in bytes. + * + * Enum initially has no enum values in it (and corresponds to enum forward + * declaration). Enumerator values can be added by btf__add_enum_value() + * immediately after btf__add_enum() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) +{ + struct btf_type *t; + int sz, err, name_off = 0; + + /* byte_sz must be power of 2 */ + if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) + return -EINVAL; + + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_type); + t = btf_add_type_mem(btf, sz); + if (!t) + return -ENOMEM; + + if (name && name[0]) { + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + } + + /* start out with vlen=0; it will be adjusted when adding enum values */ + t->name_off = name_off; + t->info = btf_type_info(BTF_KIND_ENUM, 0, 0); + t->size = byte_sz; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + btf->nr_types++; + return btf->nr_types; +} + +/* + * Append new enum value for the current ENUM type with: + * - *name* - name of the enumerator value, can't be NULL or empty; + * - *value* - integer value corresponding to enum value *name*; + * Returns: + * - 0, on success; + * - <0, on error. + */ +int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) +{ + struct btf_type *t; + struct btf_enum *v; + int sz, name_off; + + /* last type should be BTF_KIND_ENUM */ + if (btf->nr_types == 0) + return -EINVAL; + t = btf_type_by_id(btf, btf->nr_types); + if (!btf_is_enum(t)) + return -EINVAL; + + /* non-empty name */ + if (!name || !name[0]) + return -EINVAL; + if (value < INT_MIN || value > UINT_MAX) + return -E2BIG; + + /* decompose and invalidate raw data */ + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_enum); + v = btf_add_type_mem(btf, sz); + if (!v) + return -ENOMEM; + + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + + v->name_off = name_off; + v->val = value; + + /* update parent type's vlen */ + t = btf_type_by_id(btf, btf->nr_types); + btf_type_inc_vlen(t); + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + return 0; +} + +/* + * Append new BTF_KIND_FWD type with: + * - *name*, non-empty/non-NULL name; + * - *fwd_kind*, kind of forward declaration, one of BTF_FWD_STRUCT, + * BTF_FWD_UNION, or BTF_FWD_ENUM; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) +{ + if (!name || !name[0]) + return -EINVAL; + + switch (fwd_kind) { + case BTF_FWD_STRUCT: + case BTF_FWD_UNION: { + struct btf_type *t; + int id; + + id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0); + if (id <= 0) + return id; + t = btf_type_by_id(btf, id); + t->info = btf_type_info(BTF_KIND_FWD, 0, fwd_kind == BTF_FWD_UNION); + return id; + } + case BTF_FWD_ENUM: + /* enum forward in BTF currently is just an enum with no enum + * values; we also assume a standard 4-byte size for it + */ + return btf__add_enum(btf, name, sizeof(int)); + default: + return -EINVAL; + } +} + +/* + * Append new BTF_KING_TYPEDEF type with: + * - *name*, non-empty/non-NULL name; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) +{ + if (!name || !name[0]) + return -EINVAL; + + return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id); +} + +/* + * Append new BTF_KIND_VOLATILE type with: + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_volatile(struct btf *btf, int ref_type_id) +{ + return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id); +} + +/* + * Append new BTF_KIND_CONST type with: + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_const(struct btf *btf, int ref_type_id) +{ + return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id); +} + +/* + * Append new BTF_KIND_RESTRICT type with: + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_restrict(struct btf *btf, int ref_type_id) +{ + return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id); +} + +/* + * Append new BTF_KIND_FUNC type with: + * - *name*, non-empty/non-NULL name; + * - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_func(struct btf *btf, const char *name, + enum btf_func_linkage linkage, int proto_type_id) +{ + int id; + + if (!name || !name[0]) + return -EINVAL; + if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL && + linkage != BTF_FUNC_EXTERN) + return -EINVAL; + + id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id); + if (id > 0) { + struct btf_type *t = btf_type_by_id(btf, id); + + t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0); + } + return id; +} + +/* + * Append new BTF_KIND_FUNC_PROTO with: + * - *ret_type_id* - type ID for return result of a function. + * + * Function prototype initially has no arguments, but they can be added by + * btf__add_func_param() one by one, immediately after + * btf__add_func_proto() succeeded. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_func_proto(struct btf *btf, int ret_type_id) +{ + struct btf_type *t; + int sz, err; + + if (validate_type_id(ret_type_id)) + return -EINVAL; + + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_type); + t = btf_add_type_mem(btf, sz); + if (!t) + return -ENOMEM; + + /* start out with vlen=0; this will be adjusted when adding enum + * values, if necessary + */ + t->name_off = 0; + t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0); + t->type = ret_type_id; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + btf->nr_types++; + return btf->nr_types; +} + +/* + * Append new function parameter for current FUNC_PROTO type with: + * - *name* - parameter name, can be NULL or empty; + * - *type_id* - type ID describing the type of the parameter. + * Returns: + * - 0, on success; + * - <0, on error. + */ +int btf__add_func_param(struct btf *btf, const char *name, int type_id) +{ + struct btf_type *t; + struct btf_param *p; + int sz, name_off = 0; + + if (validate_type_id(type_id)) + return -EINVAL; + + /* last type should be BTF_KIND_FUNC_PROTO */ + if (btf->nr_types == 0) + return -EINVAL; + t = btf_type_by_id(btf, btf->nr_types); + if (!btf_is_func_proto(t)) + return -EINVAL; + + /* decompose and invalidate raw data */ + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_param); + p = btf_add_type_mem(btf, sz); + if (!p) + return -ENOMEM; + + if (name && name[0]) { + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + } + + p->name_off = name_off; + p->type = type_id; + + /* update parent type's vlen */ + t = btf_type_by_id(btf, btf->nr_types); + btf_type_inc_vlen(t); + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + return 0; +} + +/* + * Append new BTF_KIND_VAR type with: + * - *name* - non-empty/non-NULL name; + * - *linkage* - variable linkage, one of BTF_VAR_STATIC, + * BTF_VAR_GLOBAL_ALLOCATED, or BTF_VAR_GLOBAL_EXTERN; + * - *type_id* - type ID of the type describing the type of the variable. + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) +{ + struct btf_type *t; + struct btf_var *v; + int sz, err, name_off; + + /* non-empty name */ + if (!name || !name[0]) + return -EINVAL; + if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED && + linkage != BTF_VAR_GLOBAL_EXTERN) + return -EINVAL; + if (validate_type_id(type_id)) + return -EINVAL; + + /* deconstruct BTF, if necessary, and invalidate raw_data */ + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_type) + sizeof(struct btf_var); + t = btf_add_type_mem(btf, sz); + if (!t) + return -ENOMEM; + + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + + t->name_off = name_off; + t->info = btf_type_info(BTF_KIND_VAR, 0, 0); + t->type = type_id; + + v = btf_var(t); + v->linkage = linkage; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + btf->nr_types++; + return btf->nr_types; +} + +/* + * Append new BTF_KIND_DATASEC type with: + * - *name* - non-empty/non-NULL name; + * - *byte_sz* - data section size, in bytes. + * + * Data section is initially empty. Variables info can be added with + * btf__add_datasec_var_info() calls, after btf__add_datasec() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) +{ + struct btf_type *t; + int sz, err, name_off; + + /* non-empty name */ + if (!name || !name[0]) + return -EINVAL; + + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_type); + t = btf_add_type_mem(btf, sz); + if (!t) + return -ENOMEM; + + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + + /* start with vlen=0, which will be update as var_secinfos are added */ + t->name_off = name_off; + t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0); + t->size = byte_sz; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + btf->nr_types++; + return btf->nr_types; +} + +/* + * Append new data section variable information entry for current DATASEC type: + * - *var_type_id* - type ID, describing type of the variable; + * - *offset* - variable offset within data section, in bytes; + * - *byte_sz* - variable size, in bytes. + * + * Returns: + * - 0, on success; + * - <0, on error. + */ +int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz) +{ + struct btf_type *t; + struct btf_var_secinfo *v; + int sz; + + /* last type should be BTF_KIND_DATASEC */ + if (btf->nr_types == 0) + return -EINVAL; + t = btf_type_by_id(btf, btf->nr_types); + if (!btf_is_datasec(t)) + return -EINVAL; + + if (validate_type_id(var_type_id)) + return -EINVAL; + + /* decompose and invalidate raw data */ + if (btf_ensure_modifiable(btf)) + return -ENOMEM; + + sz = sizeof(struct btf_var_secinfo); + v = btf_add_type_mem(btf, sz); + if (!v) + return -ENOMEM; + + v->type = var_type_id; + v->offset = offset; + v->size = byte_sz; + + /* update parent type's vlen */ + t = btf_type_by_id(btf, btf->nr_types); + btf_type_inc_vlen(t); + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + return 0; +} + struct btf_ext_sec_setup_param { __u32 off; __u32 len; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b387d0b61f1c..d6629a2e8ebf 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -77,6 +77,44 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); +LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding); +LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_array(struct btf *btf, + int index_type_id, int elem_type_id, __u32 nr_elems); +/* struct/union construction APIs */ +LIBBPF_API int btf__add_struct(struct btf *btf, const char *name, __u32 sz); +LIBBPF_API int btf__add_union(struct btf *btf, const char *name, __u32 sz); +LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_id, + __u32 bit_offset, __u32 bit_size); + +/* enum construction APIs */ +LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz); +LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value); + +enum btf_fwd_kind { + BTF_FWD_STRUCT = 0, + BTF_FWD_UNION = 1, + BTF_FWD_ENUM = 2, +}; + +LIBBPF_API int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind); +LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id); +LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); + +/* func and func_proto construction APIs */ +LIBBPF_API int btf__add_func(struct btf *btf, const char *name, + enum btf_func_linkage linkage, int proto_type_id); +LIBBPF_API int btf__add_func_proto(struct btf *btf, int ret_type_id); +LIBBPF_API int btf__add_func_param(struct btf *btf, const char *name, int type_id); + +/* var & datasec construction APIs */ +LIBBPF_API int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id); +LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz); +LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, + __u32 offset, __u32 byte_sz); + struct btf_dedup_opts { unsigned int dedup_table_size; bool dont_resolve_fwds; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d02d8f51e34f..0216ee6fdc2b 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -305,7 +305,26 @@ LIBBPF_0.2.0 { bpf_prog_bind_map; bpf_prog_test_run_opts; bpf_program__section_name; + btf__add_array; + btf__add_const; + btf__add_enum; + btf__add_enum_value; + btf__add_datasec; + btf__add_datasec_var_info; + btf__add_field; + btf__add_func; + btf__add_func_param; + btf__add_func_proto; + btf__add_fwd; + btf__add_int; + btf__add_ptr; + btf__add_restrict; btf__add_str; + btf__add_struct; + btf__add_typedef; + btf__add_union; + btf__add_var; + btf__add_volatile; btf__find_str; btf__new_empty; perf_buffer__buffer_cnt; -- cgit v1.3.1 From f86ed050bcee3b7daa420eb31a8b6d079701caab Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 28 Sep 2020 19:05:31 -0700 Subject: libbpf: Add btf__str_by_offset() as a more generic variant of name_by_offset BTF strings are used not just for names, they can be arbitrary strings used for CO-RE relocations, line/func infos, etc. Thus "name_by_offset" terminology is too specific and might be misleading. Instead, introduce btf__str_by_offset() API which uses generic string terminology. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200929020533.711288-3-andriin@fb.com --- tools/lib/bpf/btf.c | 7 ++++++- tools/lib/bpf/btf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7533088b2524..2d0b1e12f50e 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1025,7 +1025,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) return btf->raw_data; } -const char *btf__name_by_offset(const struct btf *btf, __u32 offset) +const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->hdr->str_len) return btf->strs_data + offset; @@ -1033,6 +1033,11 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) return NULL; } +const char *btf__name_by_offset(const struct btf *btf, __u32 offset) +{ + return btf__str_by_offset(btf, offset); +} + int btf__get_from_id(__u32 id, struct btf **btf) { struct bpf_btf_info btf_info = { 0 }; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index d6629a2e8ebf..f7dec0144c3c 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -49,6 +49,7 @@ LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API void btf__set_fd(struct btf *btf, int fd); LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_key_size, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0216ee6fdc2b..6b10ebad69c6 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -327,6 +327,7 @@ LIBBPF_0.2.0 { btf__add_volatile; btf__find_str; btf__new_empty; + btf__str_by_offset; perf_buffer__buffer_cnt; perf_buffer__buffer_fd; perf_buffer__epoll_fd; -- cgit v1.3.1 From 9141f75a3279bab4a7d761c6050d96777b646767 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 28 Sep 2020 19:05:32 -0700 Subject: selftests/bpf: Test BTF writing APIs Add selftests for BTF writer APIs. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200929020533.711288-4-andriin@fb.com --- tools/lib/bpf/btf.c | 8 +- tools/testing/selftests/bpf/prog_tests/btf_write.c | 278 +++++++++++++++++++++ 2 files changed, 282 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_write.c (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d0b1e12f50e..c25f49fad5a6 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -31,10 +31,10 @@ struct btf { __u32 raw_size; /* - * When BTF is loaded from ELF or raw memory it is stored - * in contiguous memory block, pointed to by raw_data pointer, and - * hdr, types_data, and strs_data point inside that memory region to - * respective parts of BTF representation: + * When BTF is loaded from an ELF or raw memory it is stored + * in a contiguous memory block. The hdr, type_data, and, strs_data + * point inside that memory region to their respective parts of BTF + * representation: * * +--------------------------------+ * | Header | Types | Strings | diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c new file mode 100644 index 000000000000..88dce2cfa79b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include + +#define ASSERT_EQ(actual, expected, name) ({ \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act == ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld != expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + +#define ASSERT_STREQ(actual, expected, name) ({ \ + const char *___act = actual; \ + const char *___exp = expected; \ + bool ___ok = strcmp(___act, ___exp) == 0; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual '%s' != expected '%s'\n", \ + (name), ___act, ___exp); \ + ___ok; \ +}) + +#define ASSERT_OK(res, name) ({ \ + long long ___res = (res); \ + bool ___ok = ___res == 0; \ + CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_ERR(res, name) ({ \ + long long ___res = (res); \ + bool ___ok = ___res < 0; \ + CHECK(!___ok, (name), "unexpected success: %lld\n", ___res); \ + ___ok; \ +}) + +static int duration = 0; + +void test_btf_write() { + const struct btf_var_secinfo *vi; + const struct btf_type *t; + const struct btf_member *m; + const struct btf_enum *v; + const struct btf_param *p; + struct btf *btf; + int id, err, str_off; + + btf = btf__new_empty(); + if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf))) + return; + + str_off = btf__find_str(btf, "int"); + ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off"); + + str_off = btf__add_str(btf, "int"); + ASSERT_EQ(str_off, 1, "int_str_off"); + + str_off = btf__find_str(btf, "int"); + ASSERT_EQ(str_off, 1, "int_str_found_off"); + + /* BTF_KIND_INT */ + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(id, 1, "int_id"); + + t = btf__type_by_id(btf, 1); + /* should re-use previously added "int" string */ + ASSERT_EQ(t->name_off, str_off, "int_name_off"); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "int", "int_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_INT, "int_kind"); + ASSERT_EQ(t->size, 4, "int_sz"); + ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc"); + ASSERT_EQ(btf_int_bits(t), 32, "int_bits"); + + /* invalid int size */ + id = btf__add_int(btf, "bad sz int", 7, 0); + ASSERT_ERR(id, "int_bad_sz"); + /* invalid encoding */ + id = btf__add_int(btf, "bad enc int", 4, 123); + ASSERT_ERR(id, "int_bad_enc"); + /* NULL name */ + id = btf__add_int(btf, NULL, 4, 0); + ASSERT_ERR(id, "int_bad_null_name"); + /* empty name */ + id = btf__add_int(btf, "", 4, 0); + ASSERT_ERR(id, "int_bad_empty_name"); + + /* PTR/CONST/VOLATILE/RESTRICT */ + id = btf__add_ptr(btf, 1); + ASSERT_EQ(id, 2, "ptr_id"); + t = btf__type_by_id(btf, 2); + ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind"); + ASSERT_EQ(t->type, 1, "ptr_type"); + + id = btf__add_const(btf, 5); /* points forward to restrict */ + ASSERT_EQ(id, 3, "const_id"); + t = btf__type_by_id(btf, 3); + ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind"); + ASSERT_EQ(t->type, 5, "const_type"); + + id = btf__add_volatile(btf, 3); + ASSERT_EQ(id, 4, "volatile_id"); + t = btf__type_by_id(btf, 4); + ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind"); + ASSERT_EQ(t->type, 3, "volatile_type"); + + id = btf__add_restrict(btf, 4); + ASSERT_EQ(id, 5, "restrict_id"); + t = btf__type_by_id(btf, 5); + ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind"); + ASSERT_EQ(t->type, 4, "restrict_type"); + + /* ARRAY */ + id = btf__add_array(btf, 1, 2, 10); /* int *[10] */ + ASSERT_EQ(id, 6, "array_id"); + t = btf__type_by_id(btf, 6); + ASSERT_EQ(btf_kind(t), BTF_KIND_ARRAY, "array_kind"); + ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type"); + ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type"); + ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems"); + + /* STRUCT */ + err = btf__add_field(btf, "field", 1, 0, 0); + ASSERT_ERR(err, "no_struct_field"); + id = btf__add_struct(btf, "s1", 8); + ASSERT_EQ(id, 7, "struct_id"); + err = btf__add_field(btf, "f1", 1, 0, 0); + ASSERT_OK(err, "f1_res"); + err = btf__add_field(btf, "f2", 1, 32, 16); + ASSERT_OK(err, "f2_res"); + + t = btf__type_by_id(btf, 7); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "s1", "struct_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_STRUCT, "struct_kind"); + ASSERT_EQ(btf_vlen(t), 2, "struct_vlen"); + ASSERT_EQ(btf_kflag(t), true, "struct_kflag"); + ASSERT_EQ(t->size, 8, "struct_sz"); + m = btf_members(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name"); + ASSERT_EQ(m->type, 1, "f1_type"); + ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 0), 0, "f1_bit_sz"); + m = btf_members(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f2", "f2_name"); + ASSERT_EQ(m->type, 1, "f2_type"); + ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz"); + + /* UNION */ + id = btf__add_union(btf, "u1", 8); + ASSERT_EQ(id, 8, "union_id"); + + /* invalid, non-zero offset */ + err = btf__add_field(btf, "field", 1, 1, 0); + ASSERT_ERR(err, "no_struct_field"); + + err = btf__add_field(btf, "f1", 1, 0, 16); + ASSERT_OK(err, "f1_res"); + + t = btf__type_by_id(btf, 8); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "u1", "union_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_UNION, "union_kind"); + ASSERT_EQ(btf_vlen(t), 1, "union_vlen"); + ASSERT_EQ(btf_kflag(t), true, "union_kflag"); + ASSERT_EQ(t->size, 8, "union_sz"); + m = btf_members(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name"); + ASSERT_EQ(m->type, 1, "f1_type"); + ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz"); + + /* ENUM */ + id = btf__add_enum(btf, "e1", 4); + ASSERT_EQ(id, 9, "enum_id"); + err = btf__add_enum_value(btf, "v1", 1); + ASSERT_OK(err, "v1_res"); + err = btf__add_enum_value(btf, "v2", 2); + ASSERT_OK(err, "v2_res"); + + t = btf__type_by_id(btf, 9); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_kind"); + ASSERT_EQ(btf_vlen(t), 2, "enum_vlen"); + ASSERT_EQ(t->size, 4, "enum_sz"); + v = btf_enum(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v1", "v1_name"); + ASSERT_EQ(v->val, 1, "v1_val"); + v = btf_enum(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); + ASSERT_EQ(v->val, 2, "v2_val"); + + /* FWDs */ + id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT); + ASSERT_EQ(id, 10, "struct_fwd_id"); + t = btf__type_by_id(btf, 10); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); + ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag"); + + id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION); + ASSERT_EQ(id, 11, "union_fwd_id"); + t = btf__type_by_id(btf, 11); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); + ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag"); + + id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM); + ASSERT_EQ(id, 12, "enum_fwd_id"); + t = btf__type_by_id(btf, 12); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "enum_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind"); + ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); + ASSERT_EQ(t->size, 4, "enum_fwd_sz"); + + /* TYPEDEF */ + id = btf__add_typedef(btf, "typedef1", 1); + ASSERT_EQ(id, 13, "typedef_fwd_id"); + t = btf__type_by_id(btf, 13); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind"); + ASSERT_EQ(t->type, 1, "typedef_type"); + + /* FUNC & FUNC_PROTO */ + id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15); + ASSERT_EQ(id, 14, "func_id"); + t = btf__type_by_id(btf, 14); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "func1", "func_name"); + ASSERT_EQ(t->type, 15, "func_type"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind"); + ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen"); + + id = btf__add_func_proto(btf, 1); + ASSERT_EQ(id, 15, "func_proto_id"); + err = btf__add_func_param(btf, "p1", 1); + ASSERT_OK(err, "p1_res"); + err = btf__add_func_param(btf, "p2", 2); + ASSERT_OK(err, "p2_res"); + + t = btf__type_by_id(btf, 15); + ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC_PROTO, "func_proto_kind"); + ASSERT_EQ(btf_vlen(t), 2, "func_proto_vlen"); + ASSERT_EQ(t->type, 1, "func_proto_ret_type"); + p = btf_params(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p1", "p1_name"); + ASSERT_EQ(p->type, 1, "p1_type"); + p = btf_params(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name"); + ASSERT_EQ(p->type, 2, "p2_type"); + + /* VAR */ + id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1); + ASSERT_EQ(id, 16, "var_id"); + t = btf__type_by_id(btf, 16); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "var1", "var_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind"); + ASSERT_EQ(t->type, 1, "var_type"); + ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type"); + + /* DATASECT */ + id = btf__add_datasec(btf, "datasec1", 12); + ASSERT_EQ(id, 17, "datasec_id"); + err = btf__add_datasec_var_info(btf, 1, 4, 8); + ASSERT_OK(err, "v1_res"); + + t = btf__type_by_id(btf, 17); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "datasec1", "datasec_name"); + ASSERT_EQ(t->size, 12, "datasec_sz"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DATASEC, "datasec_kind"); + ASSERT_EQ(btf_vlen(t), 1, "datasec_vlen"); + vi = btf_var_secinfos(t) + 0; + ASSERT_EQ(vi->type, 1, "v1_type"); + ASSERT_EQ(vi->offset, 4, "v1_off"); + ASSERT_EQ(vi->size, 8, "v1_sz"); + + btf__free(btf); +} -- cgit v1.3.1 From 00e8c44a147aa237867ba38b096da5e28b85a57b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 28 Sep 2020 18:50:47 -0700 Subject: bpf, selftests: Fix cast to smaller integer type 'int' warning in raw_tp Fix warning in bpf selftests, progs/test_raw_tp_test_run.c:18:10: warning: cast to smaller integer type 'int' from 'struct task_struct *' [-Wpointer-to-int-cast] Change int type cast to long to fix. Discovered with gcc-9 and llvm-11+ where llvm was recent main branch. Fixes: 09d8ad16885ee ("selftests/bpf: Add raw_tp_test_run") Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/160134424745.11199.13841922833336698133.stgit@john-Precision-5820-Tower --- tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c index 1521853597d7..4c63cc87b9d0 100644 --- a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c +++ b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c @@ -15,7 +15,7 @@ int BPF_PROG(rename, struct task_struct *task, char *comm) count++; if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) { on_cpu = bpf_get_smp_processor_id(); - return (int)task + (int)comm; + return (long)task + (long)comm; } return 0; -- cgit v1.3.1 From d758d5d474ac5a9eb4e23188261c75bb1393afdf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Sep 2020 08:56:38 -0300 Subject: perf tools: Separate the checking of headers only used to build beautification tables Some headers are not used in building the tools directly, but instead to generate tables that then gets source code included to do id->string and string->id lookups for things like syscall flags and commands. We were adding it directly to tools/include/ and this sometimes gets in the way of building using system headers, lets untangle this a bit. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/check-headers.sh | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 0b4d6431b072..15ecb1803fb9 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -75,6 +75,15 @@ include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/unistd.h ' +# These copies are under tools/perf/trace/beauty/ as they are not used to in +# building object files only by scripts in tools/perf/trace/beauty/ to generate +# tables that then gets included in .c files for things like id->string syscall +# tables (and the reverse lookup as well: string -> id) + +BEAUTY_FILES=' +include/linux/socket.h +' + check_2 () { file1=$1 file2=$2 @@ -100,6 +109,14 @@ check () { check_2 tools/$file $file $* } +beauty_check () { + file=$1 + + shift + + check_2 tools/perf/trace/beauty/$file $file $* +} + # Check if we have the kernel headers (tools/perf/../../include), else # we're probably on a detached tarball, so no point in trying to check # differences. @@ -128,8 +145,9 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl -# These will require a beauty_check when we get some more like that -check_2 tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h +for i in $BEAUTY_FILES; do + beauty_check $i -B +done # check duplicated library files check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h -- cgit v1.3.1 From 96c48058db150639553fc4d7b89c236bf5c9257e Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Tue, 29 Sep 2020 04:32:30 -0700 Subject: selftests/bpf: Fix unused-result warning in snprintf_btf.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Daniel reports: + system("ping -c 1 127.0.0.1 > /dev/null"); This generates the following new warning when compiling BPF selftests: [...] EXT-OBJ [test_progs] cgroup_helpers.o EXT-OBJ [test_progs] trace_helpers.o EXT-OBJ [test_progs] network_helpers.o EXT-OBJ [test_progs] testing_helpers.o TEST-OBJ [test_progs] snprintf_btf.test.o /root/bpf-next/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c: In function ‘test_snprintf_btf’: /root/bpf-next/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c:30:2: warning: ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] system("ping -c 1 127.0.0.1 > /dev/null"); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ [...] Fixes: 076a95f5aff2 ("selftests/bpf: Add bpf_snprintf_btf helper tests") Reported-by: Daniel Borkmann Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601379151-21449-2-git-send-email-alan.maguire@oracle.com --- tools/testing/selftests/bpf/prog_tests/snprintf_btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c index 3a8ecf833adc..3c63a7003db4 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -27,7 +27,7 @@ void test_snprintf_btf(void) goto cleanup; /* generate receive event */ - system("ping -c 1 127.0.0.1 > /dev/null"); + (void) system("ping -c 1 127.0.0.1 > /dev/null"); if (bss->skip) { printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); -- cgit v1.3.1 From cfe77683b8d4c22d25e7cc40319f1dfe5401c48a Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Tue, 29 Sep 2020 04:32:31 -0700 Subject: selftests/bpf: Ensure snprintf_btf/bpf_iter tests compatibility with old vmlinux.h Andrii reports that bpf selftests relying on "struct btf_ptr" and BTF_F_* values will not build as vmlinux.h for older kernels will not include "struct btf_ptr" or the BTF_F_* enum values. Undefine and redefine them to work around this. Fixes: b72091bd4ee4 ("selftests/bpf: Add test for bpf_seq_printf_btf helper") Fixes: 076a95f5aff2 ("selftests/bpf: Add bpf_snprintf_btf helper tests") Reported-by: Andrii Nakryiko Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1601379151-21449-3-git-send-email-alan.maguire@oracle.com --- tools/testing/selftests/bpf/progs/bpf_iter.h | 23 ++++++++++++++++++ tools/testing/selftests/bpf/progs/btf_ptr.h | 27 ++++++++++++++++++++++ .../selftests/bpf/progs/netif_receive_skb.c | 2 +- 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/btf_ptr.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index df682af75510..6a1255465fd6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -14,6 +14,11 @@ #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used #define bpf_iter__sockmap bpf_iter__sockmap___not_used +#define btf_ptr btf_ptr___not_used +#define BTF_F_COMPACT BTF_F_COMPACT___not_used +#define BTF_F_NONAME BTF_F_NONAME___not_used +#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used +#define BTF_F_ZERO BTF_F_ZERO___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -28,6 +33,11 @@ #undef bpf_iter__bpf_map_elem #undef bpf_iter__bpf_sk_storage_map #undef bpf_iter__sockmap +#undef btf_ptr +#undef BTF_F_COMPACT +#undef BTF_F_NONAME +#undef BTF_F_PTR_RAW +#undef BTF_F_ZERO struct bpf_iter_meta { struct seq_file *seq; @@ -105,3 +115,16 @@ struct bpf_iter__sockmap { void *key; struct sock *sk; }; + +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; +}; + +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; diff --git a/tools/testing/selftests/bpf/progs/btf_ptr.h b/tools/testing/selftests/bpf/progs/btf_ptr.h new file mode 100644 index 000000000000..c3c9797c67db --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_ptr.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020, Oracle and/or its affiliates. */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define btf_ptr btf_ptr___not_used +#define BTF_F_COMPACT BTF_F_COMPACT___not_used +#define BTF_F_NONAME BTF_F_NONAME___not_used +#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used +#define BTF_F_ZERO BTF_F_ZERO___not_used +#include "vmlinux.h" +#undef btf_ptr +#undef BTF_F_COMPACT +#undef BTF_F_NONAME +#undef BTF_F_PTR_RAW +#undef BTF_F_ZERO + +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; +}; + +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c index b873d805a345..6b670039ea67 100644 --- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020, Oracle and/or its affiliates. */ -#include "vmlinux.h" +#include "btf_ptr.h" #include #include #include -- cgit v1.3.1 From d2197c7ff171e7ab6272fbe0e20d95057748d82a Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 29 Sep 2020 14:30:04 +0200 Subject: selftests/bpf_iter: Don't fail test due to missing __builtin_btf_type_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new test for task iteration in bpf_iter checks (in do_btf_read()) if it should be skipped due to missing __builtin_btf_type_id. However, this 'skip' verdict is not propagated to the caller, so the parent test will still fail. Fix this by also skipping the rest of the parent test if the skip condition was reached. Fixes: b72091bd4ee4 ("selftests/bpf: Add test for bpf_seq_printf_btf helper") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Link: https://lore.kernel.org/bpf/20200929123004.46694-1-toke@redhat.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index af15630a24dd..448885b95eed 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -172,17 +172,18 @@ done: static char taskbuf[TASKBUFSZ]; -static void do_btf_read(struct bpf_iter_task_btf *skel) +static int do_btf_read(struct bpf_iter_task_btf *skel) { struct bpf_program *prog = skel->progs.dump_task_struct; struct bpf_iter_task_btf__bss *bss = skel->bss; int iter_fd = -1, len = 0, bufleft = TASKBUFSZ; struct bpf_link *link; char *buf = taskbuf; + int ret = 0; link = bpf_program__attach_iter(prog, NULL); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) - return; + return ret; iter_fd = bpf_iter_create(bpf_link__fd(link)); if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) @@ -198,6 +199,7 @@ static void do_btf_read(struct bpf_iter_task_btf *skel) if (bss->skip) { printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); + ret = 1; test__skip(); goto free_link; } @@ -212,12 +214,14 @@ free_link: if (iter_fd > 0) close(iter_fd); bpf_link__destroy(link); + return ret; } static void test_task_btf(void) { struct bpf_iter_task_btf__bss *bss; struct bpf_iter_task_btf *skel; + int ret; skel = bpf_iter_task_btf__open_and_load(); if (CHECK(!skel, "bpf_iter_task_btf__open_and_load", @@ -226,7 +230,9 @@ static void test_task_btf(void) bss = skel->bss; - do_btf_read(skel); + ret = do_btf_read(skel); + if (ret) + goto cleanup; if (CHECK(bss->tasks == 0, "check if iterated over tasks", "no task iteration, did BPF program run?\n")) -- cgit v1.3.1 From f970cbcdcdb5b3468a6c05332818d226fb60038f Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 29 Sep 2020 14:30:26 +0200 Subject: selftests: Make sure all 'skel' variables are declared static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If programs in prog_tests using skeletons declare the 'skel' variable as global but not static, that will lead to linker errors on the final link of the prog_tests binary due to duplicate symbols. Fix a few instances of this. Fixes: b18c1f0aa477 ("bpf: selftest: Adapt sock_fields test to use skel and global variables") Fixes: 9a856cae2217 ("bpf: selftest: Add test_btf_skc_cls_ingress") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200929123026.46751-1-toke@redhat.com --- tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c | 2 +- tools/testing/selftests/bpf/prog_tests/sock_fields.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 4ce0e8a25bc5..86ccf37e26b3 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -16,7 +16,7 @@ #include "test_progs.h" #include "test_btf_skc_cls_ingress.skel.h" -struct test_btf_skc_cls_ingress *skel; +static struct test_btf_skc_cls_ingress *skel; struct sockaddr_in6 srv_sa6; static __u32 duration; diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 66e83b8fc69d..af87118e748e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -36,7 +36,7 @@ struct bpf_spinlock_cnt { static struct sockaddr_in6 srv_sa6, cli_sa6; static int sk_pkt_out_cnt10_fd; -struct test_sock_fields *skel; +static struct test_sock_fields *skel; static int sk_pkt_out_cnt_fd; static __u64 parent_cg_id; static __u64 child_cg_id; -- cgit v1.3.1 From 22ba3635163164aa86eb8a9d8645079a0bbd0a66 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 28 Sep 2020 21:30:44 -0700 Subject: selftests/bpf: Move and extend ASSERT_xxx() testing macros Move existing ASSERT_xxx() macros out of btf_write selftest into test_progs.h to use across all selftests. Also expand a set of macros for typical cases. Now there are the following macros: - ASSERT_EQ() -- check for equality of two integers; - ASSERT_STREQ() -- check for equality of two C strings; - ASSERT_OK() -- check for successful (zero) return result; - ASSERT_ERR() -- check for unsuccessful (non-zero) return result; - ASSERT_NULL() -- check for NULL pointer; - ASSERT_OK_PTR() -- check for a valid pointer; - ASSERT_ERR_PTR() -- check for NULL or negative error encoded in a pointer. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200929043046.1324350-2-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/btf_write.c | 34 ------------ tools/testing/selftests/bpf/test_progs.h | 63 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index 88dce2cfa79b..314e1e7c36df 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -3,40 +3,6 @@ #include #include -#define ASSERT_EQ(actual, expected, name) ({ \ - typeof(actual) ___act = (actual); \ - typeof(expected) ___exp = (expected); \ - bool ___ok = ___act == ___exp; \ - CHECK(!___ok, (name), \ - "unexpected %s: actual %lld != expected %lld\n", \ - (name), (long long)(___act), (long long)(___exp)); \ - ___ok; \ -}) - -#define ASSERT_STREQ(actual, expected, name) ({ \ - const char *___act = actual; \ - const char *___exp = expected; \ - bool ___ok = strcmp(___act, ___exp) == 0; \ - CHECK(!___ok, (name), \ - "unexpected %s: actual '%s' != expected '%s'\n", \ - (name), ___act, ___exp); \ - ___ok; \ -}) - -#define ASSERT_OK(res, name) ({ \ - long long ___res = (res); \ - bool ___ok = ___res == 0; \ - CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \ - ___ok; \ -}) - -#define ASSERT_ERR(res, name) ({ \ - long long ___res = (res); \ - bool ___ok = ___res < 0; \ - CHECK(!___ok, (name), "unexpected success: %lld\n", ___res); \ - ___ok; \ -}) - static int duration = 0; void test_btf_write() { diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index dbb820dde138..238f5f61189e 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -130,6 +130,69 @@ extern int test__join_cgroup(const char *path); #define CHECK_ATTR(condition, tag, format...) \ _CHECK(condition, tag, tattr.duration, format) +#define ASSERT_EQ(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act == ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld != expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + +#define ASSERT_STREQ(actual, expected, name) ({ \ + static int duration = 0; \ + const char *___act = actual; \ + const char *___exp = expected; \ + bool ___ok = strcmp(___act, ___exp) == 0; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual '%s' != expected '%s'\n", \ + (name), ___act, ___exp); \ + ___ok; \ +}) + +#define ASSERT_OK(res, name) ({ \ + static int duration = 0; \ + long long ___res = (res); \ + bool ___ok = ___res == 0; \ + CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_ERR(res, name) ({ \ + static int duration = 0; \ + long long ___res = (res); \ + bool ___ok = ___res < 0; \ + CHECK(!___ok, (name), "unexpected success: %lld\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_NULL(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = !___res; \ + CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_OK_PTR(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = !IS_ERR_OR_NULL(___res); \ + CHECK(!___ok, (name), \ + "unexpected error: %ld\n", PTR_ERR(___res)); \ + ___ok; \ +}) + +#define ASSERT_ERR_PTR(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = IS_ERR(___res) \ + CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ + ___ok; \ +}) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; -- cgit v1.3.1 From 3289959b97cac205df006fc79c88f042bf2765ab Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 28 Sep 2020 21:30:45 -0700 Subject: libbpf: Support BTF loading and raw data output in both endianness Teach BTF to recognized wrong endianness and transparently convert it internally to host endianness. Original endianness of BTF will be preserved and used during btf__get_raw_data() to convert resulting raw data to the same endianness and a source raw_data. This means that little-endian host can parse big-endian BTF with no issues, all the type data will be presented to the client application in native endianness, but when it's time for emitting BTF to persist it in a file (e.g., after BTF deduplication), original non-native endianness will be preserved and stored. It's possible to query original endianness of BTF data with new btf__endianness() API. It's also possible to override desired output endianness with btf__set_endianness(), so that if application needs to load, say, big-endian BTF and store it as little-endian BTF, it's possible to manually override this. If btf__set_endianness() was used to change endianness, btf__endianness() will reflect overridden endianness. Given there are no known use cases for supporting cross-endianness for .BTF.ext, loading .BTF.ext in non-native endianness is not supported. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200929043046.1324350-3-andriin@fb.com --- tools/lib/bpf/btf.c | 310 +++++++++++++++++++++++++++++++++++++---------- tools/lib/bpf/btf.h | 7 ++ tools/lib/bpf/libbpf.map | 2 + 3 files changed, 255 insertions(+), 64 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index c25f49fad5a6..e1dbd766c698 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2018 Facebook */ +#include #include #include #include @@ -27,8 +28,13 @@ static struct btf_type btf_void; struct btf { + /* raw BTF data in native endianness */ void *raw_data; + /* raw BTF data in non-native endianness */ + void *raw_data_swapped; __u32 raw_size; + /* whether target endianness differs from the native one */ + bool swapped_endian; /* * When BTF is loaded from an ELF or raw memory it is stored @@ -153,9 +159,19 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) return 0; } +static void btf_bswap_hdr(struct btf_header *h) +{ + h->magic = bswap_16(h->magic); + h->hdr_len = bswap_32(h->hdr_len); + h->type_off = bswap_32(h->type_off); + h->type_len = bswap_32(h->type_len); + h->str_off = bswap_32(h->str_off); + h->str_len = bswap_32(h->str_len); +} + static int btf_parse_hdr(struct btf *btf) { - const struct btf_header *hdr = btf->hdr; + struct btf_header *hdr = btf->hdr; __u32 meta_left; if (btf->raw_size < sizeof(struct btf_header)) { @@ -163,21 +179,19 @@ static int btf_parse_hdr(struct btf *btf) return -EINVAL; } - if (hdr->magic != BTF_MAGIC) { + if (hdr->magic == bswap_16(BTF_MAGIC)) { + btf->swapped_endian = true; + if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) { + pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n", + bswap_32(hdr->hdr_len)); + return -ENOTSUP; + } + btf_bswap_hdr(hdr); + } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF magic:%x\n", hdr->magic); return -EINVAL; } - if (hdr->version != BTF_VERSION) { - pr_debug("Unsupported BTF version:%u\n", hdr->version); - return -ENOTSUP; - } - - if (hdr->flags) { - pr_debug("Unsupported BTF flags:%x\n", hdr->flags); - return -ENOTSUP; - } - meta_left = btf->raw_size - sizeof(*hdr); if (!meta_left) { pr_debug("BTF has no data\n"); @@ -224,7 +238,7 @@ static int btf_parse_str_sec(struct btf *btf) static int btf_type_size(const struct btf_type *t) { - int base_size = sizeof(struct btf_type); + const int base_size = sizeof(struct btf_type); __u16 vlen = btf_vlen(t); switch (btf_kind(t)) { @@ -257,12 +271,83 @@ static int btf_type_size(const struct btf_type *t) } } +static void btf_bswap_type_base(struct btf_type *t) +{ + t->name_off = bswap_32(t->name_off); + t->info = bswap_32(t->info); + t->type = bswap_32(t->type); +} + +static int btf_bswap_type_rest(struct btf_type *t) +{ + struct btf_var_secinfo *v; + struct btf_member *m; + struct btf_array *a; + struct btf_param *p; + struct btf_enum *e; + __u16 vlen = btf_vlen(t); + int i; + + switch (btf_kind(t)) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return 0; + case BTF_KIND_INT: + *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); + return 0; + case BTF_KIND_ENUM: + for (i = 0, e = btf_enum(t); i < vlen; i++, e++) { + e->name_off = bswap_32(e->name_off); + e->val = bswap_32(e->val); + } + return 0; + case BTF_KIND_ARRAY: + a = btf_array(t); + a->type = bswap_32(a->type); + a->index_type = bswap_32(a->index_type); + a->nelems = bswap_32(a->nelems); + return 0; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + for (i = 0, m = btf_members(t); i < vlen; i++, m++) { + m->name_off = bswap_32(m->name_off); + m->type = bswap_32(m->type); + m->offset = bswap_32(m->offset); + } + return 0; + case BTF_KIND_FUNC_PROTO: + for (i = 0, p = btf_params(t); i < vlen; i++, p++) { + p->name_off = bswap_32(p->name_off); + p->type = bswap_32(p->type); + } + return 0; + case BTF_KIND_VAR: + btf_var(t)->linkage = bswap_32(btf_var(t)->linkage); + return 0; + case BTF_KIND_DATASEC: + for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) { + v->type = bswap_32(v->type); + v->offset = bswap_32(v->offset); + v->size = bswap_32(v->size); + } + return 0; + default: + pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); + return -EINVAL; + } +} + static int btf_parse_type_sec(struct btf *btf) { struct btf_header *hdr = btf->hdr; void *next_type = btf->types_data; void *end_type = next_type + hdr->type_len; - int err, type_size; + int err, i, type_size; /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(), * so ensure we can never properly use its offset from index by @@ -272,19 +357,36 @@ static int btf_parse_type_sec(struct btf *btf) if (err) return err; - while (next_type < end_type) { - err = btf_add_type_idx_entry(btf, next_type - btf->types_data); - if (err) - return err; + while (next_type + sizeof(struct btf_type) <= end_type) { + i++; + + if (btf->swapped_endian) + btf_bswap_type_base(next_type); type_size = btf_type_size(next_type); if (type_size < 0) return type_size; + if (next_type + type_size > end_type) { + pr_warn("BTF type [%d] is malformed\n", i); + return -EINVAL; + } + + if (btf->swapped_endian && btf_bswap_type_rest(next_type)) + return -EINVAL; + + err = btf_add_type_idx_entry(btf, next_type - btf->types_data); + if (err) + return err; next_type += type_size; btf->nr_types++; } + if (next_type != end_type) { + pr_warn("BTF types data is malformed\n"); + return -EINVAL; + } + return 0; } @@ -373,6 +475,38 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz) return 0; } +static bool is_host_big_endian(void) +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN + return false; +#elif __BYTE_ORDER == __BIG_ENDIAN + return true; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif +} + +enum btf_endianness btf__endianness(const struct btf *btf) +{ + if (is_host_big_endian()) + return btf->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN; + else + return btf->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; +} + +int btf__set_endianness(struct btf *btf, enum btf_endianness endian) +{ + if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) + return -EINVAL; + + btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); + if (!btf->swapped_endian) { + free(btf->raw_data_swapped); + btf->raw_data_swapped = NULL; + } + return 0; +} + static bool btf_type_is_void(const struct btf_type *t) { return t == &btf_void || btf_is_fwd(t); @@ -561,6 +695,7 @@ void btf__free(struct btf *btf) free(btf->strs_data); } free(btf->raw_data); + free(btf->raw_data_swapped); free(btf->type_offs); free(btf); } @@ -572,8 +707,10 @@ struct btf *btf__new_empty(void) btf = calloc(1, sizeof(*btf)); if (!btf) return ERR_PTR(-ENOMEM); + btf->fd = -1; btf->ptr_sz = sizeof(void *); + btf->swapped_endian = false; /* +1 for empty string at offset 0 */ btf->raw_size = sizeof(struct btf_header) + 1; @@ -604,8 +741,6 @@ struct btf *btf__new(const void *data, __u32 size) if (!btf) return ERR_PTR(-ENOMEM); - btf->fd = -1; - btf->raw_data = malloc(size); if (!btf->raw_data) { err = -ENOMEM; @@ -624,6 +759,10 @@ struct btf *btf__new(const void *data, __u32 size) err = btf_parse_str_sec(btf); err = err ?: btf_parse_type_sec(btf); + if (err) + goto done; + + btf->fd = -1; done: if (err) { @@ -634,17 +773,6 @@ done: return btf; } -static bool btf_check_endianness(const GElf_Ehdr *ehdr) -{ -#if __BYTE_ORDER == __LITTLE_ENDIAN - return ehdr->e_ident[EI_DATA] == ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN - return ehdr->e_ident[EI_DATA] == ELFDATA2MSB; -#else -# error "Unrecognized __BYTE_ORDER__" -#endif -} - struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) { Elf_Data *btf_data = NULL, *btf_ext_data = NULL; @@ -677,10 +805,6 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) pr_warn("failed to get EHDR from %s\n", path); goto done; } - if (!btf_check_endianness(&ehdr)) { - pr_warn("non-native ELF endianness is not supported\n"); - goto done; - } if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { pr_warn("failed to get e_shstrndx from %s\n", path); goto done; @@ -792,7 +916,7 @@ struct btf *btf__parse_raw(const char *path) err = -EIO; goto err_out; } - if (magic != BTF_MAGIC) { + if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) { /* definitely not a raw BTF */ err = -EPROTO; goto err_out; @@ -942,11 +1066,13 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) return err; } +static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); + int btf__load(struct btf *btf) { __u32 log_buf_size = 0, raw_size; char *log_buf = NULL; - const void *raw_data; + void *raw_data; int err = 0; if (btf->fd >= 0) @@ -961,11 +1087,14 @@ retry_load: *log_buf = 0; } - raw_data = btf__get_raw_data(btf, &raw_size); + raw_data = btf_get_raw_data(btf, &raw_size, false); if (!raw_data) { err = -ENOMEM; goto done; } + /* cache native raw data representation */ + btf->raw_size = raw_size; + btf->raw_data = raw_data; btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false); if (btf->fd < 0) { @@ -998,31 +1127,73 @@ void btf__set_fd(struct btf *btf, int fd) btf->fd = fd; } -const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) +static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian) { - struct btf *btf = (struct btf *)btf_ro; + struct btf_header *hdr = btf->hdr; + struct btf_type *t; + void *data, *p; + __u32 data_sz; + int i; - if (!btf->raw_data) { - struct btf_header *hdr = btf->hdr; - void *data; + data = swap_endian ? btf->raw_data_swapped : btf->raw_data; + if (data) { + *size = btf->raw_size; + return data; + } + + data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len; + data = calloc(1, data_sz); + if (!data) + return NULL; + p = data; + + memcpy(p, hdr, hdr->hdr_len); + if (swap_endian) + btf_bswap_hdr(p); + p += hdr->hdr_len; + + memcpy(p, btf->types_data, hdr->type_len); + if (swap_endian) { + for (i = 1; i <= btf->nr_types; i++) { + t = p + btf->type_offs[i]; + /* btf_bswap_type_rest() relies on native t->info, so + * we swap base type info after we swapped all the + * additional information + */ + if (btf_bswap_type_rest(t)) + goto err_out; + btf_bswap_type_base(t); + } + } + p += hdr->type_len; + + memcpy(p, btf->strs_data, hdr->str_len); + p += hdr->str_len; - btf->raw_size = hdr->hdr_len + hdr->type_len + hdr->str_len; - btf->raw_data = calloc(1, btf->raw_size); - if (!btf->raw_data) - return NULL; - data = btf->raw_data; + *size = data_sz; + return data; +err_out: + free(data); + return NULL; +} - memcpy(data, hdr, hdr->hdr_len); - data += hdr->hdr_len; +const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) +{ + struct btf *btf = (struct btf *)btf_ro; + __u32 data_sz; + void *data; - memcpy(data, btf->types_data, hdr->type_len); - data += hdr->type_len; + data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian); + if (!data) + return NULL; - memcpy(data, btf->strs_data, hdr->str_len); - data += hdr->str_len; - } - *size = btf->raw_size; - return btf->raw_data; + btf->raw_size = data_sz; + if (btf->swapped_endian) + btf->raw_data_swapped = data; + else + btf->raw_data = data; + *size = data_sz; + return data; } const char *btf__str_by_offset(const struct btf *btf, __u32 offset) @@ -1190,6 +1361,18 @@ static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx) return strcmp(str1, str2) == 0; } +static void btf_invalidate_raw_data(struct btf *btf) +{ + if (btf->raw_data) { + free(btf->raw_data); + btf->raw_data = NULL; + } + if (btf->raw_data_swapped) { + free(btf->raw_data_swapped); + btf->raw_data_swapped = NULL; + } +} + /* Ensure BTF is ready to be modified (by splitting into a three memory * regions for header, types, and strings). Also invalidate cached * raw_data, if any. @@ -1203,10 +1386,7 @@ static int btf_ensure_modifiable(struct btf *btf) if (btf_is_modifiable(btf)) { /* any BTF modification invalidates raw_data */ - if (btf->raw_data) { - free(btf->raw_data); - btf->raw_data = NULL; - } + btf_invalidate_raw_data(btf); return 0; } @@ -1254,8 +1434,7 @@ static int btf_ensure_modifiable(struct btf *btf) btf->strs_deduped = btf->hdr->str_len <= 1; /* invalidate raw_data representation */ - free(btf->raw_data); - btf->raw_data = NULL; + btf_invalidate_raw_data(btf); return 0; @@ -2275,7 +2454,10 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) return -EINVAL; } - if (hdr->magic != BTF_MAGIC) { + if (hdr->magic == bswap_16(BTF_MAGIC)) { + pr_warn("BTF.ext in non-native endianness is not supported\n"); + return -ENOTSUP; + } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index f7dec0144c3c..57247240a20a 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -25,6 +25,11 @@ struct btf_type; struct bpf_object; +enum btf_endianness { + BTF_LITTLE_ENDIAN = 0, + BTF_BIG_ENDIAN = 1, +}; + LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(const void *data, __u32 size); LIBBPF_API struct btf *btf__new_empty(void); @@ -42,6 +47,8 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API size_t btf__pointer_size(const struct btf *btf); LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz); +LIBBPF_API enum btf_endianness btf__endianness(const struct btf *btf); +LIBBPF_API int btf__set_endianness(struct btf *btf, enum btf_endianness endian); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 6b10ebad69c6..f7a8ff37ef04 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -325,8 +325,10 @@ LIBBPF_0.2.0 { btf__add_union; btf__add_var; btf__add_volatile; + btf__endianness; btf__find_str; btf__new_empty; + btf__set_endianness; btf__str_by_offset; perf_buffer__buffer_cnt; perf_buffer__buffer_fd; -- cgit v1.3.1 From ed9cf248b9498815d4762fe335539210b8a283a9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 28 Sep 2020 21:30:46 -0700 Subject: selftests/bpf: Test BTF's handling of endianness Add selftests juggling endianness back and forth to validate BTF's handling of endianness convertions internally. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200929043046.1324350-4-andriin@fb.com --- .../testing/selftests/bpf/prog_tests/btf_endian.c | 101 +++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_endian.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c new file mode 100644 index 000000000000..8c52d72c876e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include +#include +#include +#include + +static int duration = 0; + +void test_btf_endian() { +#if __BYTE_ORDER == __LITTLE_ENDIAN + enum btf_endianness endian = BTF_LITTLE_ENDIAN; +#elif __BYTE_ORDER == __BIG_ENDIAN + enum btf_endianness endian = BTF_BIG_ENDIAN; +#else +#error "Unrecognized __BYTE_ORDER" +#endif + enum btf_endianness swap_endian = 1 - endian; + struct btf *btf = NULL, *swap_btf = NULL; + const void *raw_data, *swap_raw_data; + const struct btf_type *t; + const struct btf_header *hdr; + __u32 raw_sz, swap_raw_sz; + int var_id; + + /* Load BTF in native endianness */ + btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL); + if (!ASSERT_OK_PTR(btf, "parse_native_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(btf), endian, "endian"); + btf__set_endianness(btf, swap_endian); + ASSERT_EQ(btf__endianness(btf), swap_endian, "endian"); + + /* Get raw BTF data in non-native endianness... */ + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) + goto err_out; + + /* ...and open it as a new BTF instance */ + swap_btf = btf__new(raw_data, raw_sz); + if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); + ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + + swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) + goto err_out; + + /* both raw data should be identical (with non-native endianness) */ + ASSERT_OK(memcmp(raw_data, swap_raw_data, raw_sz), "mem_identical"); + + /* make sure that at least BTF header data is really swapped */ + hdr = swap_raw_data; + ASSERT_EQ(bswap_16(hdr->magic), BTF_MAGIC, "btf_magic_swapped"); + ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes"); + + /* swap it back to native endianness */ + btf__set_endianness(swap_btf, endian); + swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) + goto err_out; + + /* now header should have native BTF_MAGIC */ + hdr = swap_raw_data; + ASSERT_EQ(hdr->magic, BTF_MAGIC, "btf_magic_native"); + ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes"); + + /* now modify original BTF */ + var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1); + CHECK(var_id <= 0, "var_id", "failed %d\n", var_id); + + btf__free(swap_btf); + swap_btf = NULL; + + btf__set_endianness(btf, swap_endian); + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) + goto err_out; + + /* and re-open swapped raw data again */ + swap_btf = btf__new(raw_data, raw_sz); + if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); + ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + + /* the type should appear as if it was stored in native endianness */ + t = btf__type_by_id(swap_btf, var_id); + ASSERT_STREQ(btf__str_by_offset(swap_btf, t->name_off), "some_var", "var_name"); + ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_linkage"); + ASSERT_EQ(t->type, 1, "var_type"); + +err_out: + btf__free(btf); + btf__free(swap_btf); +} -- cgit v1.3.1 From 4a1e7c0c63e02daad751842b7880f9bbcdfb6e89 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 29 Sep 2020 14:45:51 +0200 Subject: bpf: Support attaching freplace programs to multiple attach points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables support for attaching freplace programs to multiple attach points. It does this by amending the UAPI for bpf_link_Create with a target btf ID that can be used to supply the new attachment point along with the target program fd. The target must be compatible with the target that was supplied at program load time. The implementation reuses the checks that were factored out of check_attach_btf_id() to ensure compatibility between the BTF types of the old and new attachment. If these match, a new bpf_tracing_link will be created for the new attach target, allowing multiple attachments to co-exist simultaneously. The code could theoretically support multiple-attach of other types of tracing programs as well, but since I don't have a use case for any of those, there is no API support for doing so. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160138355169.48470.17165680973640685368.stgit@toke.dk --- include/linux/bpf.h | 2 + include/uapi/linux/bpf.h | 9 ++- kernel/bpf/syscall.c | 132 ++++++++++++++++++++++++++++++++++++----- kernel/bpf/verifier.c | 10 ++++ tools/include/uapi/linux/bpf.h | 9 ++- 5 files changed, 142 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 839dd8670a7a..50e5c4b52bd1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -768,6 +768,8 @@ struct bpf_prog_aux { struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; + enum bpf_prog_type saved_dst_prog_type; + enum bpf_attach_type saved_dst_attach_type; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 96ddb00b91dc..2b1d3f16cbd1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -639,8 +639,13 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ - __aligned_u64 iter_info; /* extra bpf_iter_link_info */ - __u32 iter_info_len; /* iter_info length */ + union { + __u32 target_btf_id; /* btf_id of target to attach to */ + struct { + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ + }; + }; } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e6a0a948e30c..f1528c2a6927 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -2554,12 +2555,15 @@ static const struct bpf_link_ops bpf_tracing_link_lops = { .fill_link_info = bpf_tracing_link_fill_link_info, }; -static int bpf_tracing_prog_attach(struct bpf_prog *prog) +static int bpf_tracing_prog_attach(struct bpf_prog *prog, + int tgt_prog_fd, + u32 btf_id) { struct bpf_link_primer link_primer; struct bpf_prog *tgt_prog = NULL; + struct bpf_trampoline *tr = NULL; struct bpf_tracing_link *link; - struct bpf_trampoline *tr; + u64 key = 0; int err; switch (prog->type) { @@ -2588,6 +2592,28 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) goto out_put_prog; } + if (!!tgt_prog_fd != !!btf_id) { + err = -EINVAL; + goto out_put_prog; + } + + if (tgt_prog_fd) { + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ + if (prog->type != BPF_PROG_TYPE_EXT) { + err = -EINVAL; + goto out_put_prog; + } + + tgt_prog = bpf_prog_get(tgt_prog_fd); + if (IS_ERR(tgt_prog)) { + err = PTR_ERR(tgt_prog); + tgt_prog = NULL; + goto out_put_prog; + } + + key = bpf_trampoline_compute_key(tgt_prog, btf_id); + } + link = kzalloc(sizeof(*link), GFP_USER); if (!link) { err = -ENOMEM; @@ -2599,12 +2625,58 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) mutex_lock(&prog->aux->dst_mutex); - if (!prog->aux->dst_trampoline) { + /* There are a few possible cases here: + * + * - if prog->aux->dst_trampoline is set, the program was just loaded + * and not yet attached to anything, so we can use the values stored + * in prog->aux + * + * - if prog->aux->dst_trampoline is NULL, the program has already been + * attached to a target and its initial target was cleared (below) + * + * - if tgt_prog != NULL, the caller specified tgt_prog_fd + + * target_btf_id using the link_create API. + * + * - if tgt_prog == NULL when this function was called using the old + * raw_tracepoint_open API, and we need a target from prog->aux + * + * The combination of no saved target in prog->aux, and no target + * specified on load is illegal, and we reject that here. + */ + if (!prog->aux->dst_trampoline && !tgt_prog) { err = -ENOENT; goto out_unlock; } - tr = prog->aux->dst_trampoline; - tgt_prog = prog->aux->dst_prog; + + if (!prog->aux->dst_trampoline || + (key && key != prog->aux->dst_trampoline->key)) { + /* If there is no saved target, or the specified target is + * different from the destination specified at load time, we + * need a new trampoline and a check for compatibility + */ + struct bpf_attach_target_info tgt_info = {}; + + err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, + &tgt_info); + if (err) + goto out_unlock; + + tr = bpf_trampoline_get(key, &tgt_info); + if (!tr) { + err = -ENOMEM; + goto out_unlock; + } + } else { + /* The caller didn't specify a target, or the target was the + * same as the destination supplied during program load. This + * means we can reuse the trampoline and reference from program + * load time, and there is no need to allocate a new one. This + * can only happen once for any program, as the saved values in + * prog->aux are cleared below. + */ + tr = prog->aux->dst_trampoline; + tgt_prog = prog->aux->dst_prog; + } err = bpf_link_prime(&link->link, &link_primer); if (err) @@ -2620,15 +2692,31 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) link->tgt_prog = tgt_prog; link->trampoline = tr; + /* Always clear the trampoline and target prog from prog->aux to make + * sure the original attach destination is not kept alive after a + * program is (re-)attached to another target. + */ + if (prog->aux->dst_prog && + (tgt_prog_fd || tr != prog->aux->dst_trampoline)) + /* got extra prog ref from syscall, or attaching to different prog */ + bpf_prog_put(prog->aux->dst_prog); + if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) + /* we allocated a new trampoline, so free the old one */ + bpf_trampoline_put(prog->aux->dst_trampoline); + prog->aux->dst_prog = NULL; prog->aux->dst_trampoline = NULL; mutex_unlock(&prog->aux->dst_mutex); return bpf_link_settle(&link_primer); out_unlock: + if (tr && tr != prog->aux->dst_trampoline) + bpf_trampoline_put(tr); mutex_unlock(&prog->aux->dst_mutex); kfree(link); out_put_prog: + if (tgt_prog_fd && tgt_prog) + bpf_prog_put(tgt_prog); bpf_prog_put(prog); return err; } @@ -2742,7 +2830,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog); + return bpf_tracing_prog_attach(prog, 0, 0); case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, @@ -3926,10 +4014,15 @@ err_put: static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { - if (attr->link_create.attach_type == BPF_TRACE_ITER && - prog->expected_attach_type == BPF_TRACE_ITER) - return bpf_iter_link_attach(attr, prog); + if (attr->link_create.attach_type != prog->expected_attach_type) + return -EINVAL; + if (prog->expected_attach_type == BPF_TRACE_ITER) + return bpf_iter_link_attach(attr, prog); + else if (prog->type == BPF_PROG_TYPE_EXT) + return bpf_tracing_prog_attach(prog, + attr->link_create.target_fd, + attr->link_create.target_btf_id); return -EINVAL; } @@ -3943,18 +4036,25 @@ static int link_create(union bpf_attr *attr) if (CHECK_ATTR(BPF_LINK_CREATE)) return -EINVAL; - ptype = attach_type_to_prog_type(attr->link_create.attach_type); - if (ptype == BPF_PROG_TYPE_UNSPEC) - return -EINVAL; - - prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype); + prog = bpf_prog_get(attr->link_create.prog_fd); if (IS_ERR(prog)) return PTR_ERR(prog); ret = bpf_prog_attach_check_attach_type(prog, attr->link_create.attach_type); if (ret) - goto err_out; + goto out; + + if (prog->type == BPF_PROG_TYPE_EXT) { + ret = tracing_bpf_link_attach(attr, prog); + goto out; + } + + ptype = attach_type_to_prog_type(attr->link_create.attach_type); + if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { + ret = -EINVAL; + goto out; + } switch (ptype) { case BPF_PROG_TYPE_CGROUP_SKB: @@ -3982,7 +4082,7 @@ static int link_create(union bpf_attr *attr) ret = -EINVAL; } -err_out: +out: if (ret < 0) bpf_prog_put(prog); return ret; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a97a2f2964e3..015a1c074b6b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11404,6 +11404,11 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (!btf_type_is_func_proto(t)) return -EINVAL; + if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) && + (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type || + prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type)) + return -EINVAL; + if (tgt_prog && conservative) t = NULL; @@ -11512,6 +11517,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) prog->aux->attach_func_proto = tgt_info.tgt_type; prog->aux->attach_func_name = tgt_info.tgt_name; + if (tgt_prog) { + prog->aux->saved_dst_prog_type = tgt_prog->type; + prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type; + } + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { prog->aux->attach_btf_trace = true; return 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 96ddb00b91dc..2b1d3f16cbd1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -639,8 +639,13 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ - __aligned_u64 iter_info; /* extra bpf_iter_link_info */ - __u32 iter_info_len; /* iter_info length */ + union { + __u32 target_btf_id; /* btf_id of target to attach to */ + struct { + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ + }; + }; } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ -- cgit v1.3.1 From a535909142bf2a6f3a95cabbb8b38ce3860c4807 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 29 Sep 2020 14:45:53 +0200 Subject: libbpf: Add support for freplace attachment in bpf_link_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for supplying a target btf ID for the bpf_link_create() operation, and adds a new bpf_program__attach_freplace() high-level API for attaching freplace functions with a target. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160138355387.48470.18026176785351166890.stgit@toke.dk --- tools/lib/bpf/bpf.c | 18 +++++++++++++++--- tools/lib/bpf/bpf.h | 3 ++- tools/lib/bpf/libbpf.c | 44 +++++++++++++++++++++++++++++++++++++++----- tools/lib/bpf/libbpf.h | 3 +++ tools/lib/bpf/libbpf.map | 1 + 5 files changed, 60 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 70575a37aa14..d27e34133973 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -586,19 +586,31 @@ int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, const struct bpf_link_create_opts *opts) { + __u32 target_btf_id, iter_info_len; union bpf_attr attr; if (!OPTS_VALID(opts, bpf_link_create_opts)) return -EINVAL; + iter_info_len = OPTS_GET(opts, iter_info_len, 0); + target_btf_id = OPTS_GET(opts, target_btf_id, 0); + + if (iter_info_len && target_btf_id) + return -EINVAL; + memset(&attr, 0, sizeof(attr)); attr.link_create.prog_fd = prog_fd; attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; attr.link_create.flags = OPTS_GET(opts, flags, 0); - attr.link_create.iter_info = - ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); - attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0); + + if (iter_info_len) { + attr.link_create.iter_info = + ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); + attr.link_create.iter_info_len = iter_info_len; + } else if (target_btf_id) { + attr.link_create.target_btf_id = target_btf_id; + } return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 16806810a53c..875dde20d56e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -174,8 +174,9 @@ struct bpf_link_create_opts { __u32 flags; union bpf_iter_link_info *iter_info; __u32 iter_info_len; + __u32 target_btf_id; }; -#define bpf_link_create_opts__last_field iter_info_len +#define bpf_link_create_opts__last_field target_btf_id LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 32dc444224d8..a4f55f8a460d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9390,9 +9390,11 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, } static struct bpf_link * -bpf_program__attach_fd(struct bpf_program *prog, int target_fd, +bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, const char *target_name) { + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, + .target_btf_id = btf_id); enum bpf_attach_type attach_type; char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; @@ -9410,7 +9412,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, link->detach = &bpf_link__detach_fd; attach_type = bpf_program__get_expected_attach_type(prog); - link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL); + link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts); if (link_fd < 0) { link_fd = -errno; free(link); @@ -9426,19 +9428,51 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, struct bpf_link * bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) { - return bpf_program__attach_fd(prog, cgroup_fd, "cgroup"); + return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup"); } struct bpf_link * bpf_program__attach_netns(struct bpf_program *prog, int netns_fd) { - return bpf_program__attach_fd(prog, netns_fd, "netns"); + return bpf_program__attach_fd(prog, netns_fd, 0, "netns"); } struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex) { /* target_fd/target_ifindex use the same field in LINK_CREATE */ - return bpf_program__attach_fd(prog, ifindex, "xdp"); + return bpf_program__attach_fd(prog, ifindex, 0, "xdp"); +} + +struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog, + int target_fd, + const char *attach_func_name) +{ + int btf_id; + + if (!!target_fd != !!attach_func_name) { + pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", + prog->name); + return ERR_PTR(-EINVAL); + } + + if (prog->type != BPF_PROG_TYPE_EXT) { + pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", + prog->name); + return ERR_PTR(-EINVAL); + } + + if (target_fd) { + btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); + if (btf_id < 0) + return ERR_PTR(btf_id); + + return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace"); + } else { + /* no target, so use raw_tracepoint_open for compatibility + * with old kernels + */ + return bpf_program__attach_trace(prog); + } } struct bpf_link * diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index a750f67a23f6..6909ee81113a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -261,6 +261,9 @@ LIBBPF_API struct bpf_link * bpf_program__attach_netns(struct bpf_program *prog, int netns_fd); LIBBPF_API struct bpf_link * bpf_program__attach_xdp(struct bpf_program *prog, int ifindex); +LIBBPF_API struct bpf_link * +bpf_program__attach_freplace(struct bpf_program *prog, + int target_fd, const char *attach_func_name); struct bpf_map; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f7a8ff37ef04..4ebfadf45b47 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -304,6 +304,7 @@ LIBBPF_0.2.0 { global: bpf_prog_bind_map; bpf_prog_test_run_opts; + bpf_program__attach_freplace; bpf_program__section_name; btf__add_array; btf__add_const; -- cgit v1.3.1 From f6429476c2012afade24741cc08890371d2842b4 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 29 Sep 2020 14:45:54 +0200 Subject: selftests: Add test for multiple attachments of freplace program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a selftest for attaching an freplace program to multiple targets simultaneously. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160138355497.48470.17568077161540217107.stgit@toke.dk --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 156 ++++++++++++++++----- .../selftests/bpf/progs/freplace_get_constant.c | 15 ++ 2 files changed, 139 insertions(+), 32 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/freplace_get_constant.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index eda682727787..2b94e827b2c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -2,36 +2,79 @@ /* Copyright (c) 2019 Facebook */ #include #include +#include + +typedef int (*test_cb)(struct bpf_object *obj); + +static int check_data_map(struct bpf_object *obj, int prog_cnt, bool reset) +{ + struct bpf_map *data_map = NULL, *map; + __u64 *result = NULL; + const int zero = 0; + __u32 duration = 0; + int ret = -1, i; + + result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64)); + if (CHECK(!result, "alloc_memory", "failed to alloc memory")) + return -ENOMEM; + + bpf_object__for_each_map(map, obj) + if (bpf_map__is_internal(map)) { + data_map = map; + break; + } + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto out; + + ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result); + if (CHECK(ret, "get_result", + "failed to get output data: %d\n", ret)) + goto out; + + for (i = 0; i < prog_cnt; i++) { + if (CHECK(result[i] != 1, "result", + "fexit_bpf2bpf result[%d] failed err %llu\n", + i, result[i])) + goto out; + result[i] = 0; + } + if (reset) { + ret = bpf_map_update_elem(bpf_map__fd(data_map), &zero, result, 0); + if (CHECK(ret, "reset_result", "failed to reset result\n")) + goto out; + } + + ret = 0; +out: + free(result); + return ret; +} static void test_fexit_bpf2bpf_common(const char *obj_file, const char *target_obj_file, int prog_cnt, const char **prog_name, - bool run_prog) + bool run_prog, + test_cb cb) { - struct bpf_object *obj = NULL, *pkt_obj; - int err, pkt_fd, i; - struct bpf_link **link = NULL; + struct bpf_object *obj = NULL, *tgt_obj; struct bpf_program **prog = NULL; + struct bpf_link **link = NULL; __u32 duration = 0, retval; - struct bpf_map *data_map; - const int zero = 0; - __u64 *result = NULL; + int err, tgt_fd, i; err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, - &pkt_obj, &pkt_fd); + &tgt_obj, &tgt_fd); if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", target_obj_file, err, errno)) return; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .attach_prog_fd = pkt_fd, + .attach_prog_fd = tgt_fd, ); link = calloc(sizeof(struct bpf_link *), prog_cnt); prog = calloc(sizeof(struct bpf_program *), prog_cnt); - result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64)); - if (CHECK(!link || !prog || !result, "alloc_memory", - "failed to alloc memory")) + if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory")) goto close_prog; obj = bpf_object__open_file(obj_file, &opts); @@ -53,39 +96,33 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, goto close_prog; } - if (!run_prog) - goto close_prog; + if (cb) { + err = cb(obj); + if (err) + goto close_prog; + } - data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss"); - if (CHECK(!data_map, "find_data_map", "data map not found\n")) + if (!run_prog) goto close_prog; - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) + if (check_data_map(obj, prog_cnt, false)) goto close_prog; - for (i = 0; i < prog_cnt; i++) - if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n", - result[i])) - goto close_prog; - close_prog: for (i = 0; i < prog_cnt; i++) if (!IS_ERR_OR_NULL(link[i])) bpf_link__destroy(link[i]); if (!IS_ERR_OR_NULL(obj)) bpf_object__close(obj); - bpf_object__close(pkt_obj); + bpf_object__close(tgt_obj); free(link); free(prog); - free(result); } static void test_target_no_callees(void) @@ -96,7 +133,7 @@ static void test_target_no_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o", "./test_pkt_md_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_target_yes_callees(void) @@ -110,7 +147,7 @@ static void test_target_yes_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_func_replace(void) @@ -128,7 +165,7 @@ static void test_func_replace(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_func_replace_verify(void) @@ -139,7 +176,60 @@ static void test_func_replace_verify(void) test_fexit_bpf2bpf_common("./freplace_connect4.o", "./connect4_prog.o", ARRAY_SIZE(prog_name), - prog_name, false); + prog_name, false, NULL); +} + +static int test_second_attach(struct bpf_object *obj) +{ + const char *prog_name = "freplace/get_constant"; + const char *tgt_name = prog_name + 9; /* cut off freplace/ */ + const char *tgt_obj_file = "./test_pkt_access.o"; + struct bpf_program *prog = NULL; + struct bpf_object *tgt_obj; + __u32 duration = 0, retval; + struct bpf_link *link; + int err = 0, tgt_fd; + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name)) + return -ENOENT; + + err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, + &tgt_obj, &tgt_fd); + if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n", + tgt_obj_file, err, errno)) + return err; + + link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name); + if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd)) + goto out; + + err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + goto out; + + err = check_data_map(obj, 1, true); + if (err) + goto out; + +out: + bpf_link__destroy(link); + bpf_object__close(tgt_obj); + return err; +} + +static void test_func_replace_multi(void) +{ + const char *prog_name[] = { + "freplace/get_constant", + }; + test_fexit_bpf2bpf_common("./freplace_get_constant.o", + "./test_pkt_access.o", + ARRAY_SIZE(prog_name), + prog_name, true, test_second_attach); } static void test_func_sockmap_update(void) @@ -150,7 +240,7 @@ static void test_func_sockmap_update(void) test_fexit_bpf2bpf_common("./freplace_cls_redirect.o", "./test_cls_redirect.o", ARRAY_SIZE(prog_name), - prog_name, false); + prog_name, false, NULL); } static void test_obj_load_failure_common(const char *obj_file, @@ -222,4 +312,6 @@ void test_fexit_bpf2bpf(void) test_func_replace_return_code(); if (test__start_subtest("func_map_prog_compatibility")) test_func_map_prog_compatibility(); + if (test__start_subtest("func_replace_multi")) + test_func_replace_multi(); } diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c new file mode 100644 index 000000000000..8f0ecf94e533 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_get_constant.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +volatile __u64 test_get_constant = 0; +SEC("freplace/get_constant") +int new_get_constant(long val) +{ + if (val != 123) + return 0; + test_get_constant = 1; + return test_get_constant; /* original get_constant() returns val - 122 */ +} +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 17d3f386757609223aa02342d29526daa67efafc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 29 Sep 2020 14:45:56 +0200 Subject: selftests/bpf: Adding test for arg dereference in extension trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding test that setup following program: SEC("classifier/test_pkt_md_access") int test_pkt_md_access(struct __sk_buff *skb) with its extension: SEC("freplace/test_pkt_md_access") int test_pkt_md_access_new(struct __sk_buff *skb) and tracing that extension with: SEC("fentry/test_pkt_md_access_new") int BPF_PROG(fentry, struct sk_buff *skb) The test verifies that the tracing program can dereference skb argument properly. Signed-off-by: Jiri Olsa Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160138355603.48470.9072073357530773228.stgit@toke.dk --- tools/testing/selftests/bpf/prog_tests/trace_ext.c | 111 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_trace_ext.c | 18 ++++ .../selftests/bpf/progs/test_trace_ext_tracing.c | 25 +++++ 3 files changed, 154 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/trace_ext.c create mode 100644 tools/testing/selftests/bpf/progs/test_trace_ext.c create mode 100644 tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c new file mode 100644 index 000000000000..924441d4362d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "test_pkt_md_access.skel.h" +#include "test_trace_ext.skel.h" +#include "test_trace_ext_tracing.skel.h" + +static __u32 duration; + +void test_trace_ext(void) +{ + struct test_pkt_md_access *skel_pkt = NULL; + struct test_trace_ext_tracing *skel_trace = NULL; + struct test_trace_ext_tracing__bss *bss_trace; + struct test_trace_ext *skel_ext = NULL; + struct test_trace_ext__bss *bss_ext; + int err, pkt_fd, ext_fd; + struct bpf_program *prog; + char buf[100]; + __u32 retval; + __u64 len; + + /* open/load/attach test_pkt_md_access */ + skel_pkt = test_pkt_md_access__open_and_load(); + if (CHECK(!skel_pkt, "setup", "classifier/test_pkt_md_access open failed\n")) + goto cleanup; + + err = test_pkt_md_access__attach(skel_pkt); + if (CHECK(err, "setup", "classifier/test_pkt_md_access attach failed: %d\n", err)) + goto cleanup; + + prog = skel_pkt->progs.test_pkt_md_access; + pkt_fd = bpf_program__fd(prog); + + /* open extension */ + skel_ext = test_trace_ext__open(); + if (CHECK(!skel_ext, "setup", "freplace/test_pkt_md_access open failed\n")) + goto cleanup; + + /* set extension's attach target - test_pkt_md_access */ + prog = skel_ext->progs.test_pkt_md_access_new; + bpf_program__set_attach_target(prog, pkt_fd, "test_pkt_md_access"); + + /* load/attach extension */ + err = test_trace_ext__load(skel_ext); + if (CHECK(err, "setup", "freplace/test_pkt_md_access load failed\n")) { + libbpf_strerror(err, buf, sizeof(buf)); + fprintf(stderr, "%s\n", buf); + goto cleanup; + } + + err = test_trace_ext__attach(skel_ext); + if (CHECK(err, "setup", "freplace/test_pkt_md_access attach failed: %d\n", err)) + goto cleanup; + + prog = skel_ext->progs.test_pkt_md_access_new; + ext_fd = bpf_program__fd(prog); + + /* open tracing */ + skel_trace = test_trace_ext_tracing__open(); + if (CHECK(!skel_trace, "setup", "tracing/test_pkt_md_access_new open failed\n")) + goto cleanup; + + /* set tracing's attach target - fentry */ + prog = skel_trace->progs.fentry; + bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new"); + + /* set tracing's attach target - fexit */ + prog = skel_trace->progs.fexit; + bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new"); + + /* load/attach tracing */ + err = test_trace_ext_tracing__load(skel_trace); + if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) { + libbpf_strerror(err, buf, sizeof(buf)); + fprintf(stderr, "%s\n", buf); + goto cleanup; + } + + err = test_trace_ext_tracing__attach(skel_trace); + if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err)) + goto cleanup; + + /* trigger the test */ + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval); + + bss_ext = skel_ext->bss; + bss_trace = skel_trace->bss; + + len = bss_ext->ext_called; + + CHECK(bss_ext->ext_called == 0, + "check", "failed to trigger freplace/test_pkt_md_access\n"); + CHECK(bss_trace->fentry_called != len, + "check", "failed to trigger fentry/test_pkt_md_access_new\n"); + CHECK(bss_trace->fexit_called != len, + "check", "failed to trigger fexit/test_pkt_md_access_new\n"); + +cleanup: + test_trace_ext_tracing__destroy(skel_trace); + test_trace_ext__destroy(skel_ext); + test_pkt_md_access__destroy(skel_pkt); +} diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext.c b/tools/testing/selftests/bpf/progs/test_trace_ext.c new file mode 100644 index 000000000000..d19a634d0e78 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trace_ext.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include +#include +#include +#include +#include + +__u64 ext_called = 0; + +SEC("freplace/test_pkt_md_access") +int test_pkt_md_access_new(struct __sk_buff *skb) +{ + ext_called = skb->len; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c new file mode 100644 index 000000000000..52f3baf98f20 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include + +__u64 fentry_called = 0; + +SEC("fentry/test_pkt_md_access_new") +int BPF_PROG(fentry, struct sk_buff *skb) +{ + fentry_called = skb->len; + return 0; +} + +__u64 fexit_called = 0; + +SEC("fexit/test_pkt_md_access_new") +int BPF_PROG(fexit, struct sk_buff *skb) +{ + fexit_called = skb->len; + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From bee4b7e6268be9267e288936f5b1215fec3d4368 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 29 Sep 2020 14:45:57 +0200 Subject: selftests: Add selftest for disallowing modify_return attachment to freplace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a selftest that ensures that modify_return tracing programs cannot be attached to freplace programs. The security_ prefix is added to the freplace program because that would otherwise let it pass the check for modify_return. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160138355713.48470.3811074984255709369.stgit@toke.dk --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 56 ++++++++++++++++++++++ .../selftests/bpf/progs/fmod_ret_freplace.c | 14 ++++++ .../selftests/bpf/progs/freplace_get_constant.c | 2 +- 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/fmod_ret_freplace.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 2b94e827b2c5..5c0448910426 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -232,6 +232,60 @@ static void test_func_replace_multi(void) prog_name, true, test_second_attach); } +static void test_fmod_ret_freplace(void) +{ + struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL; + const char *freplace_name = "./freplace_get_constant.o"; + const char *fmod_ret_name = "./fmod_ret_freplace.o"; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + const char *tgt_name = "./test_pkt_access.o"; + struct bpf_link *freplace_link = NULL; + struct bpf_program *prog; + __u32 duration = 0; + int err, pkt_fd; + + err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + /* the target prog should load fine */ + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", + tgt_name, err, errno)) + return; + opts.attach_prog_fd = pkt_fd; + + freplace_obj = bpf_object__open_file(freplace_name, &opts); + if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open", + "failed to open %s: %ld\n", freplace_name, + PTR_ERR(freplace_obj))) + goto out; + + err = bpf_object__load(freplace_obj); + if (CHECK(err, "freplace_obj_load", "err %d\n", err)) + goto out; + + prog = bpf_program__next(NULL, freplace_obj); + freplace_link = bpf_program__attach_trace(prog); + if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n")) + goto out; + + opts.attach_prog_fd = bpf_program__fd(prog); + fmod_obj = bpf_object__open_file(fmod_ret_name, &opts); + if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open", + "failed to open %s: %ld\n", fmod_ret_name, + PTR_ERR(fmod_obj))) + goto out; + + err = bpf_object__load(fmod_obj); + if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n")) + goto out; + +out: + bpf_link__destroy(freplace_link); + bpf_object__close(freplace_obj); + bpf_object__close(fmod_obj); + bpf_object__close(pkt_obj); +} + + static void test_func_sockmap_update(void) { const char *prog_name[] = { @@ -314,4 +368,6 @@ void test_fexit_bpf2bpf(void) test_func_map_prog_compatibility(); if (test__start_subtest("func_replace_multi")) test_func_replace_multi(); + if (test__start_subtest("fmod_ret_freplace")) + test_fmod_ret_freplace(); } diff --git a/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c new file mode 100644 index 000000000000..c8943ccee6c0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +volatile __u64 test_fmod_ret = 0; +SEC("fmod_ret/security_new_get_constant") +int BPF_PROG(fmod_ret_test, long val, int ret) +{ + test_fmod_ret = 1; + return 120; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c index 8f0ecf94e533..705e4b64dfc2 100644 --- a/tools/testing/selftests/bpf/progs/freplace_get_constant.c +++ b/tools/testing/selftests/bpf/progs/freplace_get_constant.c @@ -5,7 +5,7 @@ volatile __u64 test_get_constant = 0; SEC("freplace/get_constant") -int new_get_constant(long val) +int security_new_get_constant(long val) { if (val != 123) return 0; -- cgit v1.3.1 From c810b31ecb036b94629726d6219ad21b0c7a64a2 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 29 Sep 2020 13:07:49 -0700 Subject: bpf, selftests: Fix warning in snprintf_btf where system() call unchecked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On my systems system() calls are marked with warn_unused_result apparently. So without error checking we get this warning, ./prog_tests/snprintf_btf.c:30:9: warning: ignoring return value of ‘system’, declared with attribute warn_unused_result[-Wunused-result] Also it seems like a good idea to check the return value anyways to ensure ping exists even if its seems unlikely. Fixes: 076a95f5aff2c ("selftests/bpf: Add bpf_snprintf_btf helper tests") Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/160141006897.25201.12095049414156293265.stgit@john-Precision-5820-Tower --- tools/testing/selftests/bpf/prog_tests/snprintf_btf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c index 3c63a7003db4..686b40f11a45 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -27,7 +27,9 @@ void test_snprintf_btf(void) goto cleanup; /* generate receive event */ - (void) system("ping -c 1 127.0.0.1 > /dev/null"); + err = system("ping -c 1 127.0.0.1 > /dev/null"); + if (CHECK(err, "system", "ping failed: %d\n", err)) + goto cleanup; if (bss->skip) { printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); -- cgit v1.3.1 From 9012e3dda2a70cff7986e08d0a9cf3a8e04691e6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Sep 2020 18:07:27 -0300 Subject: perf trace beauty: Add script to autogenerate mremap's flags args string/id table It'll also conditionally generate the defines, so that if we don't have those when building a new tool tarball in an older systems, we get those, and we need them sometimes in the actual scnprintf routine, such as when checking if a flags means we have an extra arg, like with MREMAP_FIXED. $ tools/perf/trace/beauty/mremap_flags.sh static const char *mremap_flags[] = { [ilog2(1) + 1] = "MAYMOVE", #ifndef MREMAP_MAYMOVE #define MREMAP_MAYMOVE 1 #endif [ilog2(2) + 1] = "FIXED", #ifndef MREMAP_FIXED #define MREMAP_FIXED 2 #endif [ilog2(4) + 1] = "DONTUNMAP", #ifndef MREMAP_DONTUNMAP #define MREMAP_DONTUNMAP 4 #endif }; $ Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 ++++++++ tools/perf/trace/beauty/mmap.c | 32 +++++++++++++------------------- tools/perf/trace/beauty/mremap_flags.sh | 18 ++++++++++++++++++ 3 files changed, 39 insertions(+), 19 deletions(-) create mode 100755 tools/perf/trace/beauty/mremap_flags.sh (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 920d8afb9238..038de61aa69f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -537,6 +537,12 @@ mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh $(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) $(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ +mremap_flags_array := $(beauty_outdir)/mremap_flags_array.c +mremap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mremap_flags.sh + +$(mremap_flags_array): $(linux_uapi_dir)/mman.h $(mremap_flags_tbl) + $(Q)$(SHELL) '$(mremap_flags_tbl)' $(linux_uapi_dir) > $@ + mount_flags_array := $(beauty_outdir)/mount_flags_array.c mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh @@ -710,6 +716,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ $(mmap_flags_array) \ + $(mremap_flags_array) \ $(mount_flags_array) \ $(move_mount_flags_array) \ $(perf_ioctl_array) \ @@ -1008,6 +1015,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(fspick_arrays) \ $(OUTPUT)$(madvise_behavior_array) \ $(OUTPUT)$(mmap_flags_array) \ + $(OUTPUT)$(mremap_flags_array) \ $(OUTPUT)$(mount_flags_array) \ $(OUTPUT)$(move_mount_flags_array) \ $(OUTPUT)$(drm_ioctl_array) \ diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 862c8331dded..58a70ffa0a88 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -34,7 +34,7 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot #include "trace/beauty/generated/mmap_flags_array.c" - static DEFINE_STRARRAY(mmap_flags, "MAP_"); +static DEFINE_STRARRAY(mmap_flags, "MAP_"); static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) { @@ -54,28 +54,22 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags -static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - const char *flags_prefix = "MREMAP_"; - bool show_prefix = arg->show_string_prefix; - int printed = 0, flags = arg->val; +#include "trace/beauty/generated/mremap_flags_array.c" +static DEFINE_STRARRAY(mremap_flags, "MREMAP_"); -#define P_MREMAP_FLAG(n) \ - if (flags & MREMAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? flags_prefix : "", #n); \ - flags &= ~MREMAP_##n; \ - } +static size_t mremap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) +{ + return strarray__scnprintf_flags(&strarray__mremap_flags, bf, size, show_prefix, flags); +} - P_MREMAP_FLAG(MAYMOVE); - P_MREMAP_FLAG(FIXED); - P_MREMAP_FLAG(DONTUNMAP); -#undef P_MREMAP_FLAG +static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + if (!(flags & MREMAP_FIXED)) + arg->mask |= (1 << 5); /* Mask 5th ('new_address') args, ignored */ - return printed; + return mremap__scnprintf_flags(flags, bf, size, arg->show_string_prefix); } #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags diff --git a/tools/perf/trace/beauty/mremap_flags.sh b/tools/perf/trace/beauty/mremap_flags.sh new file mode 100755 index 000000000000..d58182300bb1 --- /dev/null +++ b/tools/perf/trace/beauty/mremap_flags.sh @@ -0,0 +1,18 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + linux_header_dir=tools/include/uapi/linux +else + linux_header_dir=$1 +fi + +linux_mman=${linux_header_dir}/mman.h + +printf "static const char *mremap_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MREMAP_([[:alnum:]_]+)[[:space:]]+((0x)?[[:xdigit:]]+)[[:space:]]*.*' +egrep -q $regex ${linux_mman} && \ +(egrep $regex ${linux_mman} | \ + sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MREMAP_%s\n#define MREMAP_%s %s\n#endif\n") +printf "};\n" -- cgit v1.3.1 From 6458bde368cee77e798d05cccd2316db4d748c41 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 29 Sep 2020 22:18:14 +0200 Subject: selftests/bpf: Fix endianness issues in sk_lookup/ctx_narrow_access This test makes a lot of narrow load checks while assuming little endian architecture, and therefore fails on s390. Fix by introducing LSB and LSW macros and using them to perform narrow loads. Fixes: 0ab5539f8584 ("selftests/bpf: Tests for BPF_SK_LOOKUP attach point") Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200929201814.44360-1-iii@linux.ibm.com --- tools/testing/selftests/bpf/progs/test_sk_lookup.c | 216 ++++++++++----------- 1 file changed, 101 insertions(+), 115 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index bbf8296f4d66..1032b292af5b 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -19,6 +19,17 @@ #define IP6(aaaa, bbbb, cccc, dddd) \ { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) } +/* Macros for least-significant byte and word accesses. */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define LSE_INDEX(index, size) (index) +#else +#define LSE_INDEX(index, size) ((size) - (index) - 1) +#endif +#define LSB(value, index) \ + (((__u8 *)&(value))[LSE_INDEX((index), sizeof(value))]) +#define LSW(value, index) \ + (((__u16 *)&(value))[LSE_INDEX((index), sizeof(value) / 2)]) + #define MAX_SOCKS 32 struct { @@ -369,171 +380,146 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err, family; - __u16 *half; - __u8 *byte; bool v4; v4 = (ctx->family == AF_INET); /* Narrow loads from family field */ - byte = (__u8 *)&ctx->family; - half = (__u16 *)&ctx->family; - if (byte[0] != (v4 ? AF_INET : AF_INET6) || - byte[1] != 0 || byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->family, 0) != (v4 ? AF_INET : AF_INET6) || + LSB(ctx->family, 1) != 0 || LSB(ctx->family, 2) != 0 || LSB(ctx->family, 3) != 0) return SK_DROP; - if (half[0] != (v4 ? AF_INET : AF_INET6)) + if (LSW(ctx->family, 0) != (v4 ? AF_INET : AF_INET6)) return SK_DROP; - byte = (__u8 *)&ctx->protocol; - if (byte[0] != IPPROTO_TCP || - byte[1] != 0 || byte[2] != 0 || byte[3] != 0) + /* Narrow loads from protocol field */ + if (LSB(ctx->protocol, 0) != IPPROTO_TCP || + LSB(ctx->protocol, 1) != 0 || LSB(ctx->protocol, 2) != 0 || LSB(ctx->protocol, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->protocol; - if (half[0] != IPPROTO_TCP) + if (LSW(ctx->protocol, 0) != IPPROTO_TCP) return SK_DROP; /* Narrow loads from remote_port field. Expect non-0 value. */ - byte = (__u8 *)&ctx->remote_port; - if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0) + if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 && + LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_port; - if (half[0] == 0) + if (LSW(ctx->remote_port, 0) == 0) return SK_DROP; /* Narrow loads from local_port field. Expect DST_PORT. */ - byte = (__u8 *)&ctx->local_port; - if (byte[0] != ((DST_PORT >> 0) & 0xff) || - byte[1] != ((DST_PORT >> 8) & 0xff) || - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) || + LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) || + LSB(ctx->local_port, 2) != 0 || LSB(ctx->local_port, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_port; - if (half[0] != DST_PORT) + if (LSW(ctx->local_port, 0) != DST_PORT) return SK_DROP; /* Narrow loads from IPv4 fields */ if (v4) { /* Expect non-0.0.0.0 in remote_ip4 */ - byte = (__u8 *)&ctx->remote_ip4; - if (byte[0] == 0 && byte[1] == 0 && - byte[2] == 0 && byte[3] == 0) + if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 && + LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip4; - if (half[0] == 0 && half[1] == 0) + if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0) return SK_DROP; /* Expect DST_IP4 in local_ip4 */ - byte = (__u8 *)&ctx->local_ip4; - if (byte[0] != ((DST_IP4 >> 0) & 0xff) || - byte[1] != ((DST_IP4 >> 8) & 0xff) || - byte[2] != ((DST_IP4 >> 16) & 0xff) || - byte[3] != ((DST_IP4 >> 24) & 0xff)) + if (LSB(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xff) || + LSB(ctx->local_ip4, 1) != ((DST_IP4 >> 8) & 0xff) || + LSB(ctx->local_ip4, 2) != ((DST_IP4 >> 16) & 0xff) || + LSB(ctx->local_ip4, 3) != ((DST_IP4 >> 24) & 0xff)) return SK_DROP; - half = (__u16 *)&ctx->local_ip4; - if (half[0] != ((DST_IP4 >> 0) & 0xffff) || - half[1] != ((DST_IP4 >> 16) & 0xffff)) + if (LSW(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xffff) || + LSW(ctx->local_ip4, 1) != ((DST_IP4 >> 16) & 0xffff)) return SK_DROP; } else { /* Expect 0.0.0.0 IPs when family != AF_INET */ - byte = (__u8 *)&ctx->remote_ip4; - if (byte[0] != 0 || byte[1] != 0 && - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->remote_ip4, 0) != 0 || LSB(ctx->remote_ip4, 1) != 0 || + LSB(ctx->remote_ip4, 2) != 0 || LSB(ctx->remote_ip4, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip4; - if (half[0] != 0 || half[1] != 0) + if (LSW(ctx->remote_ip4, 0) != 0 || LSW(ctx->remote_ip4, 1) != 0) return SK_DROP; - byte = (__u8 *)&ctx->local_ip4; - if (byte[0] != 0 || byte[1] != 0 && - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->local_ip4, 0) != 0 || LSB(ctx->local_ip4, 1) != 0 || + LSB(ctx->local_ip4, 2) != 0 || LSB(ctx->local_ip4, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_ip4; - if (half[0] != 0 || half[1] != 0) + if (LSW(ctx->local_ip4, 0) != 0 || LSW(ctx->local_ip4, 1) != 0) return SK_DROP; } /* Narrow loads from IPv6 fields */ if (!v4) { - /* Expenct non-:: IP in remote_ip6 */ - byte = (__u8 *)&ctx->remote_ip6; - if (byte[0] == 0 && byte[1] == 0 && - byte[2] == 0 && byte[3] == 0 && - byte[4] == 0 && byte[5] == 0 && - byte[6] == 0 && byte[7] == 0 && - byte[8] == 0 && byte[9] == 0 && - byte[10] == 0 && byte[11] == 0 && - byte[12] == 0 && byte[13] == 0 && - byte[14] == 0 && byte[15] == 0) + /* Expect non-:: IP in remote_ip6 */ + if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 && + LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 && + LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 && + LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 && + LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 && + LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 && + LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 && + LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip6; - if (half[0] == 0 && half[1] == 0 && - half[2] == 0 && half[3] == 0 && - half[4] == 0 && half[5] == 0 && - half[6] == 0 && half[7] == 0) + if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 && + LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 && + LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 && + LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0) return SK_DROP; - /* Expect DST_IP6 in local_ip6 */ - byte = (__u8 *)&ctx->local_ip6; - if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) || - byte[1] != ((DST_IP6[0] >> 8) & 0xff) || - byte[2] != ((DST_IP6[0] >> 16) & 0xff) || - byte[3] != ((DST_IP6[0] >> 24) & 0xff) || - byte[4] != ((DST_IP6[1] >> 0) & 0xff) || - byte[5] != ((DST_IP6[1] >> 8) & 0xff) || - byte[6] != ((DST_IP6[1] >> 16) & 0xff) || - byte[7] != ((DST_IP6[1] >> 24) & 0xff) || - byte[8] != ((DST_IP6[2] >> 0) & 0xff) || - byte[9] != ((DST_IP6[2] >> 8) & 0xff) || - byte[10] != ((DST_IP6[2] >> 16) & 0xff) || - byte[11] != ((DST_IP6[2] >> 24) & 0xff) || - byte[12] != ((DST_IP6[3] >> 0) & 0xff) || - byte[13] != ((DST_IP6[3] >> 8) & 0xff) || - byte[14] != ((DST_IP6[3] >> 16) & 0xff) || - byte[15] != ((DST_IP6[3] >> 24) & 0xff)) + if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) || + LSB(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 8) & 0xff) || + LSB(ctx->local_ip6[0], 2) != ((DST_IP6[0] >> 16) & 0xff) || + LSB(ctx->local_ip6[0], 3) != ((DST_IP6[0] >> 24) & 0xff) || + LSB(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xff) || + LSB(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 8) & 0xff) || + LSB(ctx->local_ip6[1], 2) != ((DST_IP6[1] >> 16) & 0xff) || + LSB(ctx->local_ip6[1], 3) != ((DST_IP6[1] >> 24) & 0xff) || + LSB(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xff) || + LSB(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 8) & 0xff) || + LSB(ctx->local_ip6[2], 2) != ((DST_IP6[2] >> 16) & 0xff) || + LSB(ctx->local_ip6[2], 3) != ((DST_IP6[2] >> 24) & 0xff) || + LSB(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xff) || + LSB(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 8) & 0xff) || + LSB(ctx->local_ip6[3], 2) != ((DST_IP6[3] >> 16) & 0xff) || + LSB(ctx->local_ip6[3], 3) != ((DST_IP6[3] >> 24) & 0xff)) return SK_DROP; - half = (__u16 *)&ctx->local_ip6; - if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) || - half[1] != ((DST_IP6[0] >> 16) & 0xffff) || - half[2] != ((DST_IP6[1] >> 0) & 0xffff) || - half[3] != ((DST_IP6[1] >> 16) & 0xffff) || - half[4] != ((DST_IP6[2] >> 0) & 0xffff) || - half[5] != ((DST_IP6[2] >> 16) & 0xffff) || - half[6] != ((DST_IP6[3] >> 0) & 0xffff) || - half[7] != ((DST_IP6[3] >> 16) & 0xffff)) + if (LSW(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xffff) || + LSW(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 16) & 0xffff) || + LSW(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xffff) || + LSW(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 16) & 0xffff) || + LSW(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xffff) || + LSW(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 16) & 0xffff) || + LSW(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xffff) || + LSW(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 16) & 0xffff)) return SK_DROP; } else { /* Expect :: IPs when family != AF_INET6 */ - byte = (__u8 *)&ctx->remote_ip6; - if (byte[0] != 0 || byte[1] != 0 || - byte[2] != 0 || byte[3] != 0 || - byte[4] != 0 || byte[5] != 0 || - byte[6] != 0 || byte[7] != 0 || - byte[8] != 0 || byte[9] != 0 || - byte[10] != 0 || byte[11] != 0 || - byte[12] != 0 || byte[13] != 0 || - byte[14] != 0 || byte[15] != 0) + if (LSB(ctx->remote_ip6[0], 0) != 0 || LSB(ctx->remote_ip6[0], 1) != 0 || + LSB(ctx->remote_ip6[0], 2) != 0 || LSB(ctx->remote_ip6[0], 3) != 0 || + LSB(ctx->remote_ip6[1], 0) != 0 || LSB(ctx->remote_ip6[1], 1) != 0 || + LSB(ctx->remote_ip6[1], 2) != 0 || LSB(ctx->remote_ip6[1], 3) != 0 || + LSB(ctx->remote_ip6[2], 0) != 0 || LSB(ctx->remote_ip6[2], 1) != 0 || + LSB(ctx->remote_ip6[2], 2) != 0 || LSB(ctx->remote_ip6[2], 3) != 0 || + LSB(ctx->remote_ip6[3], 0) != 0 || LSB(ctx->remote_ip6[3], 1) != 0 || + LSB(ctx->remote_ip6[3], 2) != 0 || LSB(ctx->remote_ip6[3], 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip6; - if (half[0] != 0 || half[1] != 0 || - half[2] != 0 || half[3] != 0 || - half[4] != 0 || half[5] != 0 || - half[6] != 0 || half[7] != 0) + if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 || + LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 || + LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 || + LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0) return SK_DROP; - byte = (__u8 *)&ctx->local_ip6; - if (byte[0] != 0 || byte[1] != 0 || - byte[2] != 0 || byte[3] != 0 || - byte[4] != 0 || byte[5] != 0 || - byte[6] != 0 || byte[7] != 0 || - byte[8] != 0 || byte[9] != 0 || - byte[10] != 0 || byte[11] != 0 || - byte[12] != 0 || byte[13] != 0 || - byte[14] != 0 || byte[15] != 0) + if (LSB(ctx->local_ip6[0], 0) != 0 || LSB(ctx->local_ip6[0], 1) != 0 || + LSB(ctx->local_ip6[0], 2) != 0 || LSB(ctx->local_ip6[0], 3) != 0 || + LSB(ctx->local_ip6[1], 0) != 0 || LSB(ctx->local_ip6[1], 1) != 0 || + LSB(ctx->local_ip6[1], 2) != 0 || LSB(ctx->local_ip6[1], 3) != 0 || + LSB(ctx->local_ip6[2], 0) != 0 || LSB(ctx->local_ip6[2], 1) != 0 || + LSB(ctx->local_ip6[2], 2) != 0 || LSB(ctx->local_ip6[2], 3) != 0 || + LSB(ctx->local_ip6[3], 0) != 0 || LSB(ctx->local_ip6[3], 1) != 0 || + LSB(ctx->local_ip6[3], 2) != 0 || LSB(ctx->local_ip6[3], 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_ip6; - if (half[0] != 0 || half[1] != 0 || - half[2] != 0 || half[3] != 0 || - half[4] != 0 || half[5] != 0 || - half[6] != 0 || half[7] != 0) + if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 || + LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 || + LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 || + LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0) return SK_DROP; } -- cgit v1.3.1 From 33433913459a6bfbfa808c202d6f5490aa43d7a7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 29 Sep 2020 15:06:02 -0700 Subject: libbpf: Fix uninitialized variable in btf_parse_type_sec Fix obvious unitialized variable use that wasn't reported by compiler. libbpf Makefile changes to catch such errors are added separately. Fixes: 3289959b97ca ("libbpf: Support BTF loading and raw data output in both endianness") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200929220604.833631-1-andriin@fb.com --- tools/lib/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index e1dbd766c698..398b1f345b3c 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -347,7 +347,7 @@ static int btf_parse_type_sec(struct btf *btf) struct btf_header *hdr = btf->hdr; void *next_type = btf->types_data; void *end_type = next_type + hdr->type_len; - int err, i, type_size; + int err, i = 0, type_size; /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(), * so ensure we can never properly use its offset from index by -- cgit v1.3.1 From 0a62291d697f1d2882650f5b9f97d331ad9a505b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 29 Sep 2020 15:06:03 -0700 Subject: libbpf: Compile libbpf under -O2 level by default and catch extra warnings For some reason compiler doesn't complain about uninitialized variable, fixed in previous patch, if libbpf is compiled without -O2 optimization level. So do compile it with -O2 and never let similar issue slip by again. -Wall is added unconditionally, so no need to specify it again. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200929220604.833631-2-andriin@fb.com --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f43249696d9f..70cb44efe8cb 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -98,7 +98,7 @@ PC_FILE = libbpf.pc ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) else - CFLAGS := -g -Wall + CFLAGS := -g -O2 endif # Append required CFLAGS -- cgit v1.3.1 From b0efc216f577997bf563d76d51673ed79c3d5f71 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 29 Sep 2020 15:06:04 -0700 Subject: libbpf: Compile in PIC mode only for shared library case Libbpf compiles .o's for static and shared library modes separately, so no need to specify -fPIC for both. Keep it only for shared library mode. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200929220604.833631-3-andriin@fb.com --- tools/lib/bpf/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 70cb44efe8cb..5f9abed3e226 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -104,13 +104,12 @@ endif # Append required CFLAGS override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum override CFLAGS += -Werror -Wall -override CFLAGS += -fPIC override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 # flags specific for shared library -SHLIB_FLAGS := -DSHARED +SHLIB_FLAGS := -DSHARED -fPIC ifeq ($(VERBOSE),1) Q = -- cgit v1.3.1 From 3c333c47041c905b81860478d2d8c0c3e8623b40 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:28:01 +0800 Subject: tools: gpio: port lsgpio to v2 uAPI Port the lsgpio tool to the latest GPIO uAPI. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- tools/gpio/lsgpio.c | 60 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c index b08d7a5e779b..5a05a454d0c9 100644 --- a/tools/gpio/lsgpio.c +++ b/tools/gpio/lsgpio.c @@ -25,57 +25,73 @@ struct gpio_flag { char *name; - unsigned long mask; + unsigned long long mask; }; struct gpio_flag flagnames[] = { { - .name = "kernel", - .mask = GPIOLINE_FLAG_KERNEL, + .name = "used", + .mask = GPIO_V2_LINE_FLAG_USED, + }, + { + .name = "input", + .mask = GPIO_V2_LINE_FLAG_INPUT, }, { .name = "output", - .mask = GPIOLINE_FLAG_IS_OUT, + .mask = GPIO_V2_LINE_FLAG_OUTPUT, }, { .name = "active-low", - .mask = GPIOLINE_FLAG_ACTIVE_LOW, + .mask = GPIO_V2_LINE_FLAG_ACTIVE_LOW, }, { .name = "open-drain", - .mask = GPIOLINE_FLAG_OPEN_DRAIN, + .mask = GPIO_V2_LINE_FLAG_OPEN_DRAIN, }, { .name = "open-source", - .mask = GPIOLINE_FLAG_OPEN_SOURCE, + .mask = GPIO_V2_LINE_FLAG_OPEN_SOURCE, }, { .name = "pull-up", - .mask = GPIOLINE_FLAG_BIAS_PULL_UP, + .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_UP, }, { .name = "pull-down", - .mask = GPIOLINE_FLAG_BIAS_PULL_DOWN, + .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN, }, { .name = "bias-disabled", - .mask = GPIOLINE_FLAG_BIAS_DISABLE, + .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED, }, }; -void print_flags(unsigned long flags) +static void print_attributes(struct gpio_v2_line_info *info) { int i; - int printed = 0; + const char *field_format = "%s"; for (i = 0; i < ARRAY_SIZE(flagnames); i++) { - if (flags & flagnames[i].mask) { - if (printed) - fprintf(stdout, " "); - fprintf(stdout, "%s", flagnames[i].name); - printed++; + if (info->flags & flagnames[i].mask) { + fprintf(stdout, field_format, flagnames[i].name); + field_format = ", %s"; } } + + if ((info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) && + (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING)) + fprintf(stdout, field_format, "both-edges"); + else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) + fprintf(stdout, field_format, "rising-edge"); + else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING) + fprintf(stdout, field_format, "falling-edge"); + + for (i = 0; i < info->num_attrs; i++) { + if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE) + fprintf(stdout, ", debounce_period=%dusec", + info->attrs[0].debounce_period_us); + } } int list_device(const char *device_name) @@ -109,18 +125,18 @@ int list_device(const char *device_name) /* Loop over the lines and print info */ for (i = 0; i < cinfo.lines; i++) { - struct gpioline_info linfo; + struct gpio_v2_line_info linfo; memset(&linfo, 0, sizeof(linfo)); - linfo.line_offset = i; + linfo.offset = i; - ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo); + ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &linfo); if (ret == -1) { ret = -errno; perror("Failed to issue LINEINFO IOCTL\n"); goto exit_close_error; } - fprintf(stdout, "\tline %2d:", linfo.line_offset); + fprintf(stdout, "\tline %2d:", linfo.offset); if (linfo.name[0]) fprintf(stdout, " \"%s\"", linfo.name); else @@ -131,7 +147,7 @@ int list_device(const char *device_name) fprintf(stdout, " unused"); if (linfo.flags) { fprintf(stdout, " ["); - print_flags(linfo.flags); + print_attributes(&linfo); fprintf(stdout, "]"); } fprintf(stdout, "\n"); -- cgit v1.3.1 From e86a863b337c50713b674007f74dc1854701eb93 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:28:02 +0800 Subject: tools: gpio: port gpio-watch to v2 uAPI Port the gpio-watch tool to the latest GPIO uAPI. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-watch.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c index 5cea24fddfa7..f229ec62301b 100644 --- a/tools/gpio/gpio-watch.c +++ b/tools/gpio/gpio-watch.c @@ -21,8 +21,8 @@ int main(int argc, char **argv) { - struct gpioline_info_changed chg; - struct gpioline_info req; + struct gpio_v2_line_info_changed chg; + struct gpio_v2_line_info req; struct pollfd pfd; int fd, i, j, ret; char *event, *end; @@ -40,11 +40,11 @@ int main(int argc, char **argv) for (i = 0, j = 2; i < argc - 2; i++, j++) { memset(&req, 0, sizeof(req)); - req.line_offset = strtoul(argv[j], &end, 0); + req.offset = strtoul(argv[j], &end, 0); if (*end != '\0') goto err_usage; - ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req); + ret = ioctl(fd, GPIO_V2_GET_LINEINFO_WATCH_IOCTL, &req); if (ret) { perror("unable to set up line watch"); return EXIT_FAILURE; @@ -71,13 +71,13 @@ int main(int argc, char **argv) } switch (chg.event_type) { - case GPIOLINE_CHANGED_REQUESTED: + case GPIO_V2_LINE_CHANGED_REQUESTED: event = "requested"; break; - case GPIOLINE_CHANGED_RELEASED: + case GPIO_V2_LINE_CHANGED_RELEASED: event = "released"; break; - case GPIOLINE_CHANGED_CONFIG: + case GPIO_V2_LINE_CHANGED_CONFIG: event = "config changed"; break; default: @@ -87,7 +87,7 @@ int main(int argc, char **argv) } printf("line %u: %s at %llu\n", - chg.info.line_offset, event, chg.timestamp); + chg.info.offset, event, chg.timestamp_ns); } } -- cgit v1.3.1 From ed60aee0edcda1fabc39ab27c9650fa172f461b4 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:28:03 +0800 Subject: tools: gpio: rename nlines to num_lines Rename nlines to num_lines to be consistent with other usage for fields describing the number of entries in an array. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-hammer.c | 26 +++++++++++++------------- tools/gpio/gpio-utils.c | 20 ++++++++++---------- tools/gpio/gpio-utils.h | 6 +++--- 3 files changed, 26 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c index 9fd926e8cb52..a2c7577fad5c 100644 --- a/tools/gpio/gpio-hammer.c +++ b/tools/gpio/gpio-hammer.c @@ -22,7 +22,7 @@ #include #include "gpio-utils.h" -int hammer_device(const char *device_name, unsigned int *lines, int nlines, +int hammer_device(const char *device_name, unsigned int *lines, int num_lines, unsigned int loops) { struct gpiohandle_data data; @@ -33,7 +33,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, unsigned int iteration = 0; memset(&data.values, 0, sizeof(data.values)); - ret = gpiotools_request_linehandle(device_name, lines, nlines, + ret = gpiotools_request_linehandle(device_name, lines, num_lines, GPIOHANDLE_REQUEST_OUTPUT, &data, "gpio-hammer"); if (ret < 0) @@ -46,15 +46,15 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, goto exit_close_error; fprintf(stdout, "Hammer lines ["); - for (i = 0; i < nlines; i++) { + for (i = 0; i < num_lines; i++) { fprintf(stdout, "%d", lines[i]); - if (i != (nlines - 1)) + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "] on %s, initial states: [", device_name); - for (i = 0; i < nlines; i++) { + for (i = 0; i < num_lines; i++) { fprintf(stdout, "%d", data.values[i]); - if (i != (nlines - 1)) + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "]\n"); @@ -63,7 +63,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, j = 0; while (1) { /* Invert all lines so we blink */ - for (i = 0; i < nlines; i++) + for (i = 0; i < num_lines; i++) data.values[i] = !data.values[i]; ret = gpiotools_set_values(fd, &data); @@ -81,9 +81,9 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines, j = 0; fprintf(stdout, "["); - for (i = 0; i < nlines; i++) { + for (i = 0; i < num_lines; i++) { fprintf(stdout, "%d: %d", lines[i], data.values[i]); - if (i != (nlines - 1)) + if (i != (num_lines - 1)) fprintf(stdout, ", "); } fprintf(stdout, "]\r"); @@ -121,7 +121,7 @@ int main(int argc, char **argv) const char *device_name = NULL; unsigned int lines[GPIOHANDLES_MAX]; unsigned int loops = 0; - int nlines; + int num_lines; int c; int i; @@ -158,11 +158,11 @@ int main(int argc, char **argv) return -1; } - nlines = i; + num_lines = i; - if (!device_name || !nlines) { + if (!device_name || !num_lines) { print_usage(); return -1; } - return hammer_device(device_name, lines, nlines, loops); + return hammer_device(device_name, lines, num_lines, loops); } diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c index 16a5d9cb9da2..d527980bcb94 100644 --- a/tools/gpio/gpio-utils.c +++ b/tools/gpio/gpio-utils.c @@ -38,7 +38,7 @@ * such as "gpiochip0" * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. + * @num_lines: The number of lines to request. * @flag: The new flag for requsted gpio. Reference * "linux/gpio.h" for the meaning of flag. * @data: Default value will be set to gpio when flag is @@ -56,7 +56,7 @@ * On failure return the errno. */ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, - unsigned int nlines, unsigned int flag, + unsigned int num_lines, unsigned int flag, struct gpiohandle_data *data, const char *consumer_label) { @@ -78,12 +78,12 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, goto exit_free_name; } - for (i = 0; i < nlines; i++) + for (i = 0; i < num_lines; i++) req.lineoffsets[i] = lines[i]; req.flags = flag; strcpy(req.consumer_label, consumer_label); - req.lines = nlines; + req.lines = num_lines; if (flag & GPIOHANDLE_REQUEST_OUTPUT) memcpy(req.default_values, data, sizeof(req.default_values)); @@ -194,20 +194,20 @@ int gpiotools_get(const char *device_name, unsigned int line) * such as "gpiochip0". * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. + * @num_lines: The number of lines to request. * @data: The array of values get from gpiochip. * * Return: On success return 0; * On failure return the errno. */ int gpiotools_gets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data) + unsigned int num_lines, struct gpiohandle_data *data) { int fd; int ret; int ret_close; - ret = gpiotools_request_linehandle(device_name, lines, nlines, + ret = gpiotools_request_linehandle(device_name, lines, num_lines, GPIOHANDLE_REQUEST_INPUT, data, CONSUMER); if (ret < 0) @@ -245,7 +245,7 @@ int gpiotools_set(const char *device_name, unsigned int line, * such as "gpiochip0". * @lines: An array desired lines, specified by offset * index for the associated GPIO device. - * @nline: The number of lines to request. + * @num_lines: The number of lines to request. * @data: The array of values set to gpiochip, must be * 0(low) or 1(high). * @@ -253,11 +253,11 @@ int gpiotools_set(const char *device_name, unsigned int line, * On failure return the errno. */ int gpiotools_sets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data) + unsigned int num_lines, struct gpiohandle_data *data) { int ret; - ret = gpiotools_request_linehandle(device_name, lines, nlines, + ret = gpiotools_request_linehandle(device_name, lines, num_lines, GPIOHANDLE_REQUEST_OUTPUT, data, CONSUMER); if (ret < 0) diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h index cf37f13f3dcb..324729577865 100644 --- a/tools/gpio/gpio-utils.h +++ b/tools/gpio/gpio-utils.h @@ -23,7 +23,7 @@ static inline int check_prefix(const char *str, const char *prefix) } int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, - unsigned int nlines, unsigned int flag, + unsigned int num_lines, unsigned int flag, struct gpiohandle_data *data, const char *consumer_label); int gpiotools_set_values(const int fd, struct gpiohandle_data *data); @@ -32,10 +32,10 @@ int gpiotools_release_linehandle(const int fd); int gpiotools_get(const char *device_name, unsigned int line); int gpiotools_gets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data); + unsigned int num_lines, struct gpiohandle_data *data); int gpiotools_set(const char *device_name, unsigned int line, unsigned int value); int gpiotools_sets(const char *device_name, unsigned int *lines, - unsigned int nlines, struct gpiohandle_data *data); + unsigned int num_lines, struct gpiohandle_data *data); #endif /* _GPIO_UTILS_H_ */ -- cgit v1.3.1 From 7ff6d1d25a9ea3a03f795d383889ac34fbc601d5 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:28:04 +0800 Subject: tools: gpio: port gpio-hammer to v2 uAPI Port the gpio-hammer tool to the latest GPIO uAPI. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-hammer.c | 32 +++++---- tools/gpio/gpio-utils.c | 164 ++++++++++++++++++++++++++++++++++++++--------- tools/gpio/gpio-utils.h | 46 +++++++++++-- 3 files changed, 197 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c index a2c7577fad5c..54fdf59dd320 100644 --- a/tools/gpio/gpio-hammer.c +++ b/tools/gpio/gpio-hammer.c @@ -25,23 +25,30 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines, unsigned int loops) { - struct gpiohandle_data data; + struct gpio_v2_line_values values; + struct gpio_v2_line_config config; char swirr[] = "-\\|/"; int fd; int ret; int i, j; unsigned int iteration = 0; - memset(&data.values, 0, sizeof(data.values)); - ret = gpiotools_request_linehandle(device_name, lines, num_lines, - GPIOHANDLE_REQUEST_OUTPUT, &data, - "gpio-hammer"); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_OUTPUT; + + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, "gpio-hammer"); if (ret < 0) goto exit_error; else fd = ret; - ret = gpiotools_get_values(fd, &data); + values.mask = 0; + values.bits = 0; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&values.mask, i); + + ret = gpiotools_get_values(fd, &values); if (ret < 0) goto exit_close_error; @@ -53,7 +60,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines, } fprintf(stdout, "] on %s, initial states: [", device_name); for (i = 0; i < num_lines; i++) { - fprintf(stdout, "%d", data.values[i]); + fprintf(stdout, "%d", gpiotools_test_bit(values.bits, i)); if (i != (num_lines - 1)) fprintf(stdout, ", "); } @@ -64,14 +71,14 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines, while (1) { /* Invert all lines so we blink */ for (i = 0; i < num_lines; i++) - data.values[i] = !data.values[i]; + gpiotools_change_bit(&values.bits, i); - ret = gpiotools_set_values(fd, &data); + ret = gpiotools_set_values(fd, &values); if (ret < 0) goto exit_close_error; /* Re-read values to get status */ - ret = gpiotools_get_values(fd, &data); + ret = gpiotools_get_values(fd, &values); if (ret < 0) goto exit_close_error; @@ -82,7 +89,8 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines, fprintf(stdout, "["); for (i = 0; i < num_lines; i++) { - fprintf(stdout, "%d: %d", lines[i], data.values[i]); + fprintf(stdout, "%d: %d", lines[i], + gpiotools_test_bit(values.bits, i)); if (i != (num_lines - 1)) fprintf(stdout, ", "); } @@ -97,7 +105,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines, ret = 0; exit_close_error: - gpiotools_release_linehandle(fd); + gpiotools_release_line(fd); exit_error: return ret; } diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c index d527980bcb94..37187e056c8b 100644 --- a/tools/gpio/gpio-utils.c +++ b/tools/gpio/gpio-utils.c @@ -100,20 +100,87 @@ exit_free_name: free(chrdev_name); return ret < 0 ? ret : req.fd; } + +/** + * gpiotools_request_line() - request gpio lines in a gpiochip + * @device_name: The name of gpiochip without prefix "/dev/", + * such as "gpiochip0" + * @lines: An array desired lines, specified by offset + * index for the associated GPIO device. + * @num_lines: The number of lines to request. + * @config: The new config for requested gpio. Reference + * "linux/gpio.h" for config details. + * @consumer: The name of consumer, such as "sysfs", + * "powerkey". This is useful for other users to + * know who is using. + * + * Request gpio lines through the ioctl provided by chardev. User + * could call gpiotools_set_values() and gpiotools_get_values() to + * read and write respectively through the returned fd. Call + * gpiotools_release_line() to release these lines after that. + * + * Return: On success return the fd; + * On failure return the errno. + */ +int gpiotools_request_line(const char *device_name, unsigned int *lines, + unsigned int num_lines, + struct gpio_v2_line_config *config, + const char *consumer) +{ + struct gpio_v2_line_request req; + char *chrdev_name; + int fd; + int i; + int ret; + + ret = asprintf(&chrdev_name, "/dev/%s", device_name); + if (ret < 0) + return -ENOMEM; + + fd = open(chrdev_name, 0); + if (fd == -1) { + ret = -errno; + fprintf(stderr, "Failed to open %s, %s\n", + chrdev_name, strerror(errno)); + goto exit_free_name; + } + + memset(&req, 0, sizeof(req)); + for (i = 0; i < num_lines; i++) + req.offsets[i] = lines[i]; + + req.config = *config; + strcpy(req.consumer, consumer); + req.num_lines = num_lines; + + ret = ioctl(fd, GPIO_V2_GET_LINE_IOCTL, &req); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue %s (%d), %s\n", + "GPIO_GET_LINE_IOCTL", ret, strerror(errno)); + } + + if (close(fd) == -1) + perror("Failed to close GPIO character device file"); +exit_free_name: + free(chrdev_name); + return ret < 0 ? ret : req.fd; +} + /** * gpiotools_set_values(): Set the value of gpio(s) * @fd: The fd returned by - * gpiotools_request_linehandle(). - * @data: The array of values want to set. + * gpiotools_request_line(). + * @values: The array of values want to set. * * Return: On success return 0; * On failure return the errno. */ -int gpiotools_set_values(const int fd, struct gpiohandle_data *data) +int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values) { int ret; - ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data); + ret = ioctl(fd, GPIO_V2_LINE_SET_VALUES_IOCTL, values); if (ret == -1) { ret = -errno; fprintf(stderr, "Failed to issue %s (%d), %s\n", @@ -127,17 +194,17 @@ int gpiotools_set_values(const int fd, struct gpiohandle_data *data) /** * gpiotools_get_values(): Get the value of gpio(s) * @fd: The fd returned by - * gpiotools_request_linehandle(). - * @data: The array of values get from hardware. + * gpiotools_request_line(). + * @values: The array of values get from hardware. * * Return: On success return 0; * On failure return the errno. */ -int gpiotools_get_values(const int fd, struct gpiohandle_data *data) +int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values) { int ret; - ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data); + ret = ioctl(fd, GPIO_V2_LINE_GET_VALUES_IOCTL, values); if (ret == -1) { ret = -errno; fprintf(stderr, "Failed to issue %s (%d), %s\n", @@ -169,6 +236,27 @@ int gpiotools_release_linehandle(const int fd) return ret; } +/** + * gpiotools_release_line(): Release the line(s) of gpiochip + * @fd: The fd returned by + * gpiotools_request_line(). + * + * Return: On success return 0; + * On failure return the errno. + */ +int gpiotools_release_line(const int fd) +{ + int ret; + + ret = close(fd); + if (ret == -1) { + perror("Failed to close GPIO LINE device file"); + ret = -errno; + } + + return ret; +} + /** * gpiotools_get(): Get value from specific line * @device_name: The name of gpiochip without prefix "/dev/", @@ -180,11 +268,14 @@ int gpiotools_release_linehandle(const int fd) */ int gpiotools_get(const char *device_name, unsigned int line) { - struct gpiohandle_data data; + int ret; + unsigned int value; unsigned int lines[] = {line}; - gpiotools_gets(device_name, lines, 1, &data); - return data.values[0]; + ret = gpiotools_gets(device_name, lines, 1, &value); + if (ret) + return ret; + return value; } @@ -195,27 +286,35 @@ int gpiotools_get(const char *device_name, unsigned int line) * @lines: An array desired lines, specified by offset * index for the associated GPIO device. * @num_lines: The number of lines to request. - * @data: The array of values get from gpiochip. + * @values: The array of values get from gpiochip. * * Return: On success return 0; * On failure return the errno. */ int gpiotools_gets(const char *device_name, unsigned int *lines, - unsigned int num_lines, struct gpiohandle_data *data) + unsigned int num_lines, unsigned int *values) { - int fd; + int fd, i; int ret; int ret_close; + struct gpio_v2_line_config config; + struct gpio_v2_line_values lv; - ret = gpiotools_request_linehandle(device_name, lines, num_lines, - GPIOHANDLE_REQUEST_INPUT, data, - CONSUMER); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_INPUT; + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, CONSUMER); if (ret < 0) return ret; fd = ret; - ret = gpiotools_get_values(fd, data); - ret_close = gpiotools_release_linehandle(fd); + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&lv.mask, i); + ret = gpiotools_get_values(fd, &lv); + if (!ret) + for (i = 0; i < num_lines; i++) + values[i] = gpiotools_test_bit(lv.bits, i); + ret_close = gpiotools_release_line(fd); return ret < 0 ? ret : ret_close; } @@ -232,11 +331,9 @@ int gpiotools_gets(const char *device_name, unsigned int *lines, int gpiotools_set(const char *device_name, unsigned int line, unsigned int value) { - struct gpiohandle_data data; unsigned int lines[] = {line}; - data.values[0] = value; - return gpiotools_sets(device_name, lines, 1, &data); + return gpiotools_sets(device_name, lines, 1, &value); } /** @@ -246,22 +343,31 @@ int gpiotools_set(const char *device_name, unsigned int line, * @lines: An array desired lines, specified by offset * index for the associated GPIO device. * @num_lines: The number of lines to request. - * @data: The array of values set to gpiochip, must be + * @value: The array of values set to gpiochip, must be * 0(low) or 1(high). * * Return: On success return 0; * On failure return the errno. */ int gpiotools_sets(const char *device_name, unsigned int *lines, - unsigned int num_lines, struct gpiohandle_data *data) + unsigned int num_lines, unsigned int *values) { - int ret; + int ret, i; + struct gpio_v2_line_config config; - ret = gpiotools_request_linehandle(device_name, lines, num_lines, - GPIOHANDLE_REQUEST_OUTPUT, data, - CONSUMER); + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_OUTPUT; + config.num_attrs = 1; + config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES; + for (i = 0; i < num_lines; i++) { + gpiotools_set_bit(&config.attrs[0].mask, i); + gpiotools_assign_bit(&config.attrs[0].attr.values, + i, values[i]); + } + ret = gpiotools_request_line(device_name, lines, num_lines, + &config, CONSUMER); if (ret < 0) return ret; - return gpiotools_release_linehandle(ret); + return gpiotools_release_line(ret); } diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h index 324729577865..6c69a9f1c253 100644 --- a/tools/gpio/gpio-utils.h +++ b/tools/gpio/gpio-utils.h @@ -12,7 +12,9 @@ #ifndef _GPIO_UTILS_H_ #define _GPIO_UTILS_H_ +#include #include +#include #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -26,16 +28,52 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines, unsigned int num_lines, unsigned int flag, struct gpiohandle_data *data, const char *consumer_label); -int gpiotools_set_values(const int fd, struct gpiohandle_data *data); -int gpiotools_get_values(const int fd, struct gpiohandle_data *data); int gpiotools_release_linehandle(const int fd); +int gpiotools_request_line(const char *device_name, + unsigned int *lines, + unsigned int num_lines, + struct gpio_v2_line_config *config, + const char *consumer); +int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values); +int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values); +int gpiotools_release_line(const int fd); + int gpiotools_get(const char *device_name, unsigned int line); int gpiotools_gets(const char *device_name, unsigned int *lines, - unsigned int num_lines, struct gpiohandle_data *data); + unsigned int num_lines, unsigned int *values); int gpiotools_set(const char *device_name, unsigned int line, unsigned int value); int gpiotools_sets(const char *device_name, unsigned int *lines, - unsigned int num_lines, struct gpiohandle_data *data); + unsigned int num_lines, unsigned int *values); + +/* helper functions for gpio_v2_line_values bits */ +static inline void gpiotools_set_bit(__u64 *b, int n) +{ + *b |= _BITULL(n); +} + +static inline void gpiotools_change_bit(__u64 *b, int n) +{ + *b ^= _BITULL(n); +} + +static inline void gpiotools_clear_bit(__u64 *b, int n) +{ + *b &= ~_BITULL(n); +} + +static inline int gpiotools_test_bit(__u64 b, int n) +{ + return !!(b & _BITULL(n)); +} + +static inline void gpiotools_assign_bit(__u64 *b, int n, bool value) +{ + if (value) + gpiotools_set_bit(b, n); + else + gpiotools_clear_bit(b, n); +} #endif /* _GPIO_UTILS_H_ */ -- cgit v1.3.1 From 0acda979df8d065f9cb16bbc723723b036697e57 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:28:05 +0800 Subject: tools: gpio: port gpio-event-mon to v2 uAPI Port the gpio-event-mon tool to the latest GPIO uAPI. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-event-mon.c | 91 +++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1a303a81aeef..b230af889f26 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -23,17 +23,16 @@ #include #include #include +#include "gpio-utils.h" int monitor_device(const char *device_name, unsigned int line, - uint32_t handleflags, - uint32_t eventflags, + struct gpio_v2_line_config *config, unsigned int loops) { - struct gpioevent_request req; - struct gpiohandle_data data; + struct gpio_v2_line_values values; char *chrdev_name; - int fd; + int cfd, lfd; int ret; int i = 0; @@ -41,44 +40,39 @@ int monitor_device(const char *device_name, if (ret < 0) return -ENOMEM; - fd = open(chrdev_name, 0); - if (fd == -1) { + cfd = open(chrdev_name, 0); + if (cfd == -1) { ret = -errno; fprintf(stderr, "Failed to open %s\n", chrdev_name); goto exit_free_name; } - req.lineoffset = line; - req.handleflags = handleflags; - req.eventflags = eventflags; - strcpy(req.consumer_label, "gpio-event-mon"); - - ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req); - if (ret == -1) { - ret = -errno; - fprintf(stderr, "Failed to issue GET EVENT " - "IOCTL (%d)\n", - ret); - goto exit_close_error; - } + ret = gpiotools_request_line(device_name, &line, 1, config, + "gpio-event-mon"); + if (ret < 0) + goto exit_device_close; + else + lfd = ret; /* Read initial states */ - ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data); - if (ret == -1) { - ret = -errno; - fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE " - "VALUES IOCTL (%d)\n", + values.mask = 1; + values.bits = 0; + ret = gpiotools_get_values(lfd, &values); + if (ret < 0) { + fprintf(stderr, + "Failed to issue GPIO LINE GET VALUES IOCTL (%d)\n", ret); - goto exit_close_error; + goto exit_line_close; } fprintf(stdout, "Monitoring line %d on %s\n", line, device_name); - fprintf(stdout, "Initial line value: %d\n", data.values[0]); + fprintf(stdout, "Initial line value: %d\n", + gpiotools_test_bit(values.bits, 0)); while (1) { - struct gpioevent_data event; + struct gpio_v2_line_event event; - ret = read(req.fd, &event, sizeof(event)); + ret = read(lfd, &event, sizeof(event)); if (ret == -1) { if (errno == -EAGAIN) { fprintf(stderr, "nothing available\n"); @@ -96,12 +90,14 @@ int monitor_device(const char *device_name, ret = -EIO; break; } - fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp); + fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", + event.timestamp_ns, event.offset, event.line_seqno, + event.seqno); switch (event.id) { - case GPIOEVENT_EVENT_RISING_EDGE: + case GPIO_V2_LINE_EVENT_RISING_EDGE: fprintf(stdout, "rising edge"); break; - case GPIOEVENT_EVENT_FALLING_EDGE: + case GPIO_V2_LINE_EVENT_FALLING_EDGE: fprintf(stdout, "falling edge"); break; default: @@ -114,8 +110,11 @@ int monitor_device(const char *device_name, break; } -exit_close_error: - if (close(fd) == -1) +exit_line_close: + if (close(lfd) == -1) + perror("Failed to close line file"); +exit_device_close: + if (close(cfd) == -1) perror("Failed to close GPIO character device file"); exit_free_name: free(chrdev_name); @@ -140,15 +139,20 @@ void print_usage(void) ); } +#define EDGE_FLAGS \ + (GPIO_V2_LINE_FLAG_EDGE_RISING | \ + GPIO_V2_LINE_FLAG_EDGE_FALLING) + int main(int argc, char **argv) { const char *device_name = NULL; unsigned int line = -1; unsigned int loops = 0; - uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT; - uint32_t eventflags = 0; + struct gpio_v2_line_config config; int c; + memset(&config, 0, sizeof(config)); + config.flags = GPIO_V2_LINE_FLAG_INPUT; while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) { switch (c) { case 'c': @@ -161,16 +165,16 @@ int main(int argc, char **argv) line = strtoul(optarg, NULL, 10); break; case 'd': - handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN; + config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN; break; case 's': - handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE; + config.flags |= GPIO_V2_LINE_FLAG_OPEN_SOURCE; break; case 'r': - eventflags |= GPIOEVENT_REQUEST_RISING_EDGE; + config.flags |= GPIO_V2_LINE_FLAG_EDGE_RISING; break; case 'f': - eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE; + config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING; break; case '?': print_usage(); @@ -182,11 +186,10 @@ int main(int argc, char **argv) print_usage(); return -1; } - if (!eventflags) { + if (!(config.flags & EDGE_FLAGS)) { printf("No flags specified, listening on both rising and " "falling edges\n"); - eventflags = GPIOEVENT_REQUEST_BOTH_EDGES; + config.flags |= EDGE_FLAGS; } - return monitor_device(device_name, line, handleflags, - eventflags, loops); + return monitor_device(device_name, line, &config, loops); } -- cgit v1.3.1 From 62757c32d5db33fa3abb7b7b88201f9b4ecbc85b Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:28:06 +0800 Subject: tools: gpio: add multi-line monitoring to gpio-event-mon Extend gpio-event-mon to support monitoring multiple lines. This would require multiple lineevent requests to implement using uAPI v1, but can be performed with a single line request using uAPI v2. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-event-mon.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index b230af889f26..0c34f18f511c 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -26,7 +26,8 @@ #include "gpio-utils.h" int monitor_device(const char *device_name, - unsigned int line, + unsigned int *lines, + unsigned int num_lines, struct gpio_v2_line_config *config, unsigned int loops) { @@ -47,7 +48,7 @@ int monitor_device(const char *device_name, goto exit_free_name; } - ret = gpiotools_request_line(device_name, &line, 1, config, + ret = gpiotools_request_line(device_name, lines, num_lines, config, "gpio-event-mon"); if (ret < 0) goto exit_device_close; @@ -55,8 +56,10 @@ int monitor_device(const char *device_name, lfd = ret; /* Read initial states */ - values.mask = 1; + values.mask = 0; values.bits = 0; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&values.mask, i); ret = gpiotools_get_values(lfd, &values); if (ret < 0) { fprintf(stderr, @@ -65,9 +68,23 @@ int monitor_device(const char *device_name, goto exit_line_close; } - fprintf(stdout, "Monitoring line %d on %s\n", line, device_name); - fprintf(stdout, "Initial line value: %d\n", - gpiotools_test_bit(values.bits, 0)); + if (num_lines == 1) { + fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name); + fprintf(stdout, "Initial line value: %d\n", + gpiotools_test_bit(values.bits, 0)); + } else { + fprintf(stdout, "Monitoring lines %d", lines[0]); + for (i = 1; i < num_lines - 1; i++) + fprintf(stdout, ", %d", lines[i]); + fprintf(stdout, " and %d on %s\n", lines[i], device_name); + fprintf(stdout, "Initial line values: %d", + gpiotools_test_bit(values.bits, 0)); + for (i = 1; i < num_lines - 1; i++) + fprintf(stdout, ", %d", + gpiotools_test_bit(values.bits, i)); + fprintf(stdout, " and %d\n", + gpiotools_test_bit(values.bits, i)); + } while (1) { struct gpio_v2_line_event event; @@ -126,7 +143,7 @@ void print_usage(void) fprintf(stderr, "Usage: gpio-event-mon [options]...\n" "Listen to events on GPIO lines, 0->1 1->0\n" " -n Listen on GPIOs on a named device (must be stated)\n" - " -o Offset to monitor\n" + " -o Offset of line to monitor (may be repeated)\n" " -d Set line as open drain\n" " -s Set line as open source\n" " -r Listen for rising edges\n" @@ -146,7 +163,8 @@ void print_usage(void) int main(int argc, char **argv) { const char *device_name = NULL; - unsigned int line = -1; + unsigned int lines[GPIO_V2_LINES_MAX]; + unsigned int num_lines = 0; unsigned int loops = 0; struct gpio_v2_line_config config; int c; @@ -162,7 +180,12 @@ int main(int argc, char **argv) device_name = optarg; break; case 'o': - line = strtoul(optarg, NULL, 10); + if (num_lines >= GPIO_V2_LINES_MAX) { + print_usage(); + return -1; + } + lines[num_lines] = strtoul(optarg, NULL, 10); + num_lines++; break; case 'd': config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN; @@ -182,7 +205,7 @@ int main(int argc, char **argv) } } - if (!device_name || line == -1) { + if (!device_name || num_lines == 0) { print_usage(); return -1; } @@ -191,5 +214,5 @@ int main(int argc, char **argv) "falling edges\n"); config.flags |= EDGE_FLAGS; } - return monitor_device(device_name, line, &config, loops); + return monitor_device(device_name, lines, num_lines, &config, loops); } -- cgit v1.3.1 From cf048e05b68789e9fa35f246f8ecbe95d79f4173 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:28:07 +0800 Subject: tools: gpio: add debounce support to gpio-event-mon Add support for debouncing monitored lines to gpio-event-mon. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-event-mon.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 0c34f18f511c..90c3155f05b1 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -148,11 +148,12 @@ void print_usage(void) " -s Set line as open source\n" " -r Listen for rising edges\n" " -f Listen for falling edges\n" + " -b Debounce the line with period n microseconds\n" " [-c ] Do loops (optional, infinite loop if not stated)\n" " -? This helptext\n" "\n" "Example:\n" - "gpio-event-mon -n gpiochip0 -o 4 -r -f\n" + "gpio-event-mon -n gpiochip0 -o 4 -r -f -b 10000\n" ); } @@ -167,11 +168,12 @@ int main(int argc, char **argv) unsigned int num_lines = 0; unsigned int loops = 0; struct gpio_v2_line_config config; - int c; + int c, attr, i; + unsigned long debounce_period_us = 0; memset(&config, 0, sizeof(config)); config.flags = GPIO_V2_LINE_FLAG_INPUT; - while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) { + while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) { switch (c) { case 'c': loops = strtoul(optarg, NULL, 10); @@ -187,6 +189,9 @@ int main(int argc, char **argv) lines[num_lines] = strtoul(optarg, NULL, 10); num_lines++; break; + case 'b': + debounce_period_us = strtoul(optarg, NULL, 10); + break; case 'd': config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN; break; @@ -205,6 +210,15 @@ int main(int argc, char **argv) } } + if (debounce_period_us) { + attr = config.num_attrs; + config.num_attrs++; + for (i = 0; i < num_lines; i++) + gpiotools_set_bit(&config.attrs[attr].mask, i); + config.attrs[attr].attr.id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE; + config.attrs[attr].attr.debounce_period_us = debounce_period_us; + } + if (!device_name || num_lines == 0) { print_usage(); return -1; -- cgit v1.3.1 From 48d072c4e8cdb542ade06727c31d7851bcc40a89 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 24 Sep 2020 12:17:33 +0200 Subject: selftests: netfilter: add time counter check Check packets are correctly placed in current year. Also do a NULL check for another one. Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_meta.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index 18a1abca3262..087f0e6e71ce 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -23,6 +23,8 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo trap cleanup EXIT +currentyear=$(date +%G) +lastyear=$((currentyear-1)) ip netns exec "$ns0" nft -f /dev/stdin < Date: Wed, 30 Sep 2020 09:34:20 -0300 Subject: perf beauty mmap_flags: Conditionaly define the mmap flags So that in older systems we get it in the mmap flags scnprintf routines: $ tools/perf/trace/beauty/mmap_flags.sh | head -9 2> /dev/null static const char *mmap_flags[] = { [ilog2(0x40) + 1] = "32BIT", #ifndef MAP_32BIT #define MAP_32BIT 0x40 #endif [ilog2(0x01) + 1] = "SHARED", #ifndef MAP_SHARED #define MAP_SHARED 0x01 #endif $ Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap_flags.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh index 5f5eefcb3c74..39eb2595983b 100755 --- a/tools/perf/trace/beauty/mmap_flags.sh +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -21,20 +21,20 @@ printf "static const char *mmap_flags[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' egrep -q $regex ${arch_mman} && \ (egrep $regex ${arch_mman} | \ - sed -r "s/$regex/\2 \1/g" | \ - xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") + sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") egrep -q $regex ${linux_mman} && \ (egrep $regex ${linux_mman} | \ egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ - sed -r "s/$regex/\2 \1/g" | \ - xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") + sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") ([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*' ${arch_mman}) && (egrep $regex ${header_dir}/mman.h | \ - sed -r "s/$regex/\2 \1/g" | \ - xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") + sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") printf "};\n" -- cgit v1.3.1 From b426ce83baa7dff947fb354118d3133f2953aac8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:15 +0200 Subject: bpf: Add classid helper only based on skb->sk Similarly to 5a52ae4e32a6 ("bpf: Allow to retrieve cgroup v1 classid from v2 hooks"), add a helper to retrieve cgroup v1 classid solely based on the skb->sk, so it can be used as key as part of BPF map lookups out of tc from host ns, in particular given the skb->sk is retained these days when crossing net ns thanks to 9c4c325252c5 ("skbuff: preserve sock reference when scrubbing the skb."). This is similar to bpf_skb_cgroup_id() which implements the same for v2. Kubernetes ecosystem is still operating on v1 however, hence net_cls needs to be used there until this can be dropped in with the v2 helper of bpf_skb_cgroup_id(). Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/ed633cf27a1c620e901c5aa99ebdefb028dce600.1601477936.git.daniel@iogearbox.net --- include/uapi/linux/bpf.h | 10 ++++++++++ net/core/filter.c | 21 +++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 10 ++++++++++ 3 files changed, 41 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2b1d3f16cbd1..6116a7f54c8f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3643,6 +3643,15 @@ union bpf_attr { * *flags* are identical to those used for bpf_snprintf_btf. * Return * 0 on success or a negative error in case of failure. + * + * u64 bpf_skb_cgroup_classid(struct sk_buff *skb) + * Description + * See **bpf_get_cgroup_classid**\ () for the main description. + * This helper differs from **bpf_get_cgroup_classid**\ () in that + * the cgroup v1 net_cls class is retrieved only from the *skb*'s + * associated socket instead of the current process. + * Return + * The id is returned or 0 in case the id could not be retrieved. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3796,6 +3805,7 @@ union bpf_attr { FN(copy_from_user), \ FN(snprintf_btf), \ FN(seq_printf_btf), \ + FN(skb_cgroup_classid), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/filter.c b/net/core/filter.c index af88935e24b1..fa01c697977d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2707,6 +2707,23 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = { .gpl_only = false, .ret_type = RET_INTEGER, }; + +BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb) +{ + struct sock *sk = skb_to_full_sk(skb); + + if (!sk || !sk_fullsock(sk)) + return 0; + + return sock_cgroup_classid(&sk->sk_cgrp_data); +} + +static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = { + .func = bpf_skb_cgroup_classid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; #endif BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) @@ -6772,6 +6789,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_get_xfrm_state: return &bpf_skb_get_xfrm_state_proto; #endif +#ifdef CONFIG_CGROUP_NET_CLASSID + case BPF_FUNC_skb_cgroup_classid: + return &bpf_skb_cgroup_classid_proto; +#endif #ifdef CONFIG_SOCK_CGROUP_DATA case BPF_FUNC_skb_cgroup_id: return &bpf_skb_cgroup_id_proto; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2b1d3f16cbd1..6116a7f54c8f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3643,6 +3643,15 @@ union bpf_attr { * *flags* are identical to those used for bpf_snprintf_btf. * Return * 0 on success or a negative error in case of failure. + * + * u64 bpf_skb_cgroup_classid(struct sk_buff *skb) + * Description + * See **bpf_get_cgroup_classid**\ () for the main description. + * This helper differs from **bpf_get_cgroup_classid**\ () in that + * the cgroup v1 net_cls class is retrieved only from the *skb*'s + * associated socket instead of the current process. + * Return + * The id is returned or 0 in case the id could not be retrieved. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3796,6 +3805,7 @@ union bpf_attr { FN(copy_from_user), \ FN(snprintf_btf), \ FN(seq_printf_btf), \ + FN(skb_cgroup_classid), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From b4ab31414970a7a03a5d55d75083f2c101a30592 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:17 +0200 Subject: bpf: Add redirect_neigh helper as redirect drop-in Add a redirect_neigh() helper as redirect() drop-in replacement for the xmit side. Main idea for the helper is to be very similar in semantics to the latter just that the skb gets injected into the neighboring subsystem in order to let the stack do the work it knows best anyway to populate the L2 addresses of the packet and then hand over to dev_queue_xmit() as redirect() does. This solves two bigger items: i) skbs don't need to go up to the stack on the host facing veth ingress side for traffic egressing the container to achieve the same for populating L2 which also has the huge advantage that ii) the skb->sk won't get orphaned in ip_rcv_core() when entering the IP routing layer on the host stack. Given that skb->sk neither gets orphaned when crossing the netns as per 9c4c325252c5 ("skbuff: preserve sock reference when scrubbing the skb.") the helper can then push the skbs directly to the phys device where FQ scheduler can do its work and TCP stack gets proper backpressure given we hold on to skb->sk as long as skb is still residing in queues. With the helper used in BPF data path to then push the skb to the phys device, I observed a stable/consistent TCP_STREAM improvement on veth devices for traffic going container -> host -> host -> container from ~10Gbps to ~15Gbps for a single stream in my test environment. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Cc: David Ahern Link: https://lore.kernel.org/bpf/f207de81629e1724899b73b8112e0013be782d35.1601477936.git.daniel@iogearbox.net --- include/linux/skbuff.h | 5 + include/uapi/linux/bpf.h | 14 +++ net/core/filter.c | 276 ++++++++++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 14 +++ 4 files changed, 294 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 04a18e01b362..3d0cf3722bb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2548,6 +2548,11 @@ static inline int skb_mac_header_was_set(const struct sk_buff *skb) return skb->mac_header != (typeof(skb->mac_header))~0U; } +static inline void skb_unset_mac_header(struct sk_buff *skb) +{ + skb->mac_header = (typeof(skb->mac_header))~0U; +} + static inline void skb_reset_mac_header(struct sk_buff *skb) { skb->mac_header = skb->data - skb->head; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6116a7f54c8f..1f17c6752deb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3652,6 +3652,19 @@ union bpf_attr { * associated socket instead of the current process. * Return * The id is returned or 0 in case the id could not be retrieved. + * + * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex* + * and fill in L2 addresses from neighboring subsystem. This helper + * is somewhat similar to **bpf_redirect**\ (), except that it + * fills in e.g. MAC addresses based on the L3 information from + * the packet. This helper is supported for IPv4 and IPv6 protocols. + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3806,6 +3819,7 @@ union bpf_attr { FN(snprintf_btf), \ FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ + FN(redirect_neigh), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/filter.c b/net/core/filter.c index a0776e48dcc9..3fb6adad1957 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2163,13 +2163,233 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, return __bpf_redirect_no_mac(skb, dev, flags); } +#if IS_ENABLED(CONFIG_IPV6) +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + u32 hh_len = LL_RESERVED_SPACE(dev); + const struct in6_addr *nexthop; + struct neighbour *neigh; + + if (dev_xmit_recursion()) { + net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); + goto out_drop; + } + + skb->dev = dev; + skb->tstamp = 0; + + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + struct sk_buff *skb2; + + skb2 = skb_realloc_headroom(skb, hh_len); + if (unlikely(!skb2)) { + kfree_skb(skb); + return -ENOMEM; + } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + consume_skb(skb); + skb = skb2; + } + + rcu_read_lock_bh(); + nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), + &ipv6_hdr(skb)->daddr); + neigh = ip_neigh_gw6(dev, nexthop); + if (likely(!IS_ERR(neigh))) { + int ret; + + sock_confirm_neigh(skb, neigh); + dev_xmit_recursion_inc(); + ret = neigh_output(neigh, skb, false); + dev_xmit_recursion_dec(); + rcu_read_unlock_bh(); + return ret; + } + rcu_read_unlock_bh(); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); +out_drop: + kfree_skb(skb); + return -ENETDOWN; +} + +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct net *net = dev_net(dev); + int err, ret = NET_XMIT_DROP; + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_flags = FLOWI_FLAG_ANYSRC, + .flowi6_mark = skb->mark, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_oif = dev->ifindex, + .flowi6_proto = ip6h->nexthdr, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + }; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + if (IS_ERR(dst)) + goto out_drop; + + skb_dst_set(skb, dst); + + err = bpf_out_neigh_v6(net, skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + goto out_xmit; +out_drop: + dev->stats.tx_errors++; + kfree_skb(skb); +out_xmit: + return ret; +} +#else +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +{ + kfree_skb(skb); + return NET_XMIT_DROP; +} +#endif /* CONFIG_IPV6 */ + +#if IS_ENABLED(CONFIG_INET) +static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct rtable *rt = container_of(dst, struct rtable, dst); + struct net_device *dev = dst->dev; + u32 hh_len = LL_RESERVED_SPACE(dev); + struct neighbour *neigh; + bool is_v6gw = false; + + if (dev_xmit_recursion()) { + net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); + goto out_drop; + } + + skb->dev = dev; + skb->tstamp = 0; + + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + struct sk_buff *skb2; + + skb2 = skb_realloc_headroom(skb, hh_len); + if (unlikely(!skb2)) { + kfree_skb(skb); + return -ENOMEM; + } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + consume_skb(skb); + skb = skb2; + } + + rcu_read_lock_bh(); + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + if (likely(!IS_ERR(neigh))) { + int ret; + + sock_confirm_neigh(skb, neigh); + dev_xmit_recursion_inc(); + ret = neigh_output(neigh, skb, is_v6gw); + dev_xmit_recursion_dec(); + rcu_read_unlock_bh(); + return ret; + } + rcu_read_unlock_bh(); +out_drop: + kfree_skb(skb); + return -ENETDOWN; +} + +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +{ + const struct iphdr *ip4h = ip_hdr(skb); + struct net *net = dev_net(dev); + int err, ret = NET_XMIT_DROP; + struct rtable *rt; + struct flowi4 fl4 = { + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_oif = dev->ifindex, + .flowi4_proto = ip4h->protocol, + .daddr = ip4h->daddr, + .saddr = ip4h->saddr, + }; + + rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto out_drop; + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + ip_rt_put(rt); + goto out_drop; + } + + skb_dst_set(skb, &rt->dst); + + err = bpf_out_neigh_v4(net, skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + goto out_xmit; +out_drop: + dev->stats.tx_errors++; + kfree_skb(skb); +out_xmit: + return ret; +} +#else +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +{ + kfree_skb(skb); + return NET_XMIT_DROP; +} +#endif /* CONFIG_INET */ + +static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) +{ + struct ethhdr *ethh = eth_hdr(skb); + + if (unlikely(skb->mac_header >= skb->network_header)) + goto out; + bpf_push_mac_rcsum(skb); + if (is_multicast_ether_addr(ethh->h_dest)) + goto out; + + skb_pull(skb, sizeof(*ethh)); + skb_unset_mac_header(skb); + skb_reset_network_header(skb); + + if (skb->protocol == htons(ETH_P_IP)) + return __bpf_redirect_neigh_v4(skb, dev); + else if (skb->protocol == htons(ETH_P_IPV6)) + return __bpf_redirect_neigh_v6(skb, dev); +out: + kfree_skb(skb); + return -ENOTSUPP; +} + +/* Internal, non-exposed redirect flags. */ +enum { + BPF_F_NEIGH = (1ULL << 1), +#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH) +}; + BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; struct sk_buff *clone; int ret; - if (unlikely(flags & ~(BPF_F_INGRESS))) + if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL))) return -EINVAL; dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); @@ -2206,23 +2426,11 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); -BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) -{ - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - - if (unlikely(flags & ~(BPF_F_INGRESS))) - return TC_ACT_SHOT; - - ri->flags = flags; - ri->tgt_index = ifindex; - - return TC_ACT_REDIRECT; -} - int skb_do_redirect(struct sk_buff *skb) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct net_device *dev; + u32 flags = ri->flags; dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index); ri->tgt_index = 0; @@ -2231,7 +2439,22 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - return __bpf_redirect(skb, dev, ri->flags); + return flags & BPF_F_NEIGH ? + __bpf_redirect_neigh(skb, dev) : + __bpf_redirect(skb, dev, flags); +} + +BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL))) + return TC_ACT_SHOT; + + ri->flags = flags; + ri->tgt_index = ifindex; + + return TC_ACT_REDIRECT; } static const struct bpf_func_proto bpf_redirect_proto = { @@ -2242,6 +2465,27 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + if (unlikely(flags)) + return TC_ACT_SHOT; + + ri->flags = BPF_F_NEIGH; + ri->tgt_index = ifindex; + + return TC_ACT_REDIRECT; +} + +static const struct bpf_func_proto bpf_redirect_neigh_proto = { + .func = bpf_redirect_neigh, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes) { msg->apply_bytes = bytes; @@ -6759,6 +7003,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return bpf_get_skb_set_tunnel_proto(func_id); case BPF_FUNC_redirect: return &bpf_redirect_proto; + case BPF_FUNC_redirect_neigh: + return &bpf_redirect_neigh_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; case BPF_FUNC_get_hash_recalc: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6116a7f54c8f..1f17c6752deb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3652,6 +3652,19 @@ union bpf_attr { * associated socket instead of the current process. * Return * The id is returned or 0 in case the id could not be retrieved. + * + * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex* + * and fill in L2 addresses from neighboring subsystem. This helper + * is somewhat similar to **bpf_redirect**\ (), except that it + * fills in e.g. MAC addresses based on the L3 information from + * the packet. This helper is supported for IPv4 and IPv6 protocols. + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3806,6 +3819,7 @@ union bpf_attr { FN(snprintf_btf), \ FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ + FN(redirect_neigh), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 0e9f6841f664f801a69d27f765dc70b8a93e1959 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:18 +0200 Subject: bpf, libbpf: Add bpf_tail_call_static helper for bpf programs Port of tail_call_static() helper function from Cilium's BPF code base [0] to libbpf, so others can easily consume it as well. We've been using this in production code for some time now. The main idea is that we guarantee that the kernel's BPF infrastructure and JIT (here: x86_64) can patch the JITed BPF insns with direct jumps instead of having to fall back to using expensive retpolines. By using inline asm, we guarantee that the compiler won't merge the call from different paths with potentially different content of r2/r3. We're also using Cilium's __throw_build_bug() macro (here as: __bpf_unreachable()) in different places as a neat trick to trigger compilation errors when compiler does not remove code at compilation time. This works for the BPF back end as it does not implement the __builtin_trap(). [0] https://github.com/cilium/cilium/commit/f5537c26020d5297b70936c6b7d03a1e412a1035 Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1656a082e077552eb46642d513b4a6bde9a7dd01.1601477936.git.daniel@iogearbox.net --- tools/lib/bpf/bpf_helpers.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 1106777df00b..2bdb7d6dbad2 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -53,6 +53,52 @@ }) #endif +/* + * Helper macro to throw a compilation error if __bpf_unreachable() gets + * built into the resulting code. This works given BPF back end does not + * implement __builtin_trap(). This is useful to assert that certain paths + * of the program code are never used and hence eliminated by the compiler. + * + * For example, consider a switch statement that covers known cases used by + * the program. __bpf_unreachable() can then reside in the default case. If + * the program gets extended such that a case is not covered in the switch + * statement, then it will throw a build error due to the default case not + * being compiled out. + */ +#ifndef __bpf_unreachable +# define __bpf_unreachable() __builtin_trap() +#endif + +/* + * Helper function to perform a tail call with a constant/immediate map slot. + */ +static __always_inline void +bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) +{ + if (!__builtin_constant_p(slot)) + __bpf_unreachable(); + + /* + * Provide a hard guarantee that LLVM won't optimize setting r2 (map + * pointer) and r3 (constant map index) from _different paths_ ending + * up at the _same_ call insn as otherwise we won't be able to use the + * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel + * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key + * tracking for prog array pokes") for details on verifier tracking. + * + * Note on clobber list: we need to stay in-line with BPF calling + * convention, so even if we don't end up using r0, r4, r5, we need + * to mark them as clobber so that LLVM doesn't end up using them + * before / after the call. + */ + asm volatile("r1 = %[ctx]\n\t" + "r2 = %[map]\n\t" + "r3 = %[slot]\n\t" + "call 12" + :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot) + : "r0", "r1", "r2", "r3", "r4", "r5"); +} + /* * Helper structure used by eBPF C program * to describe BPF map attributes to libbpf loader -- cgit v1.3.1 From faef26fa444dc44eeff70c9a63ebe7fef00b6c37 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:19 +0200 Subject: bpf, selftests: Use bpf_tail_call_static where appropriate For those locations where we use an immediate tail call map index use the newly added bpf_tail_call_static() helper. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/3cfb2b799a62d22c6e7ae5897c23940bdcc24cbc.1601477936.git.daniel@iogearbox.net --- samples/bpf/sockex3_kern.c | 20 +++++++++------- tools/testing/selftests/bpf/progs/bpf_flow.c | 12 +++++----- tools/testing/selftests/bpf/progs/tailcall1.c | 28 +++++++++++----------- tools/testing/selftests/bpf/progs/tailcall2.c | 14 +++++------ tools/testing/selftests/bpf/progs/tailcall3.c | 4 ++-- .../selftests/bpf/progs/tailcall_bpf2bpf1.c | 4 ++-- .../selftests/bpf/progs/tailcall_bpf2bpf2.c | 6 ++--- .../selftests/bpf/progs/tailcall_bpf2bpf3.c | 6 ++--- .../selftests/bpf/progs/tailcall_bpf2bpf4.c | 6 ++--- 9 files changed, 51 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index cab9cca0b8eb..8142d02b33e6 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -31,28 +31,30 @@ struct { #define PARSE_IP 3 #define PARSE_IPV6 4 -/* protocol dispatch routine. - * It tail-calls next BPF program depending on eth proto - * Note, we could have used: - * bpf_tail_call(skb, &jmp_table, proto); - * but it would need large prog_array +/* Protocol dispatch routine. It tail-calls next BPF program depending + * on eth proto. Note, we could have used ... + * + * bpf_tail_call(skb, &jmp_table, proto); + * + * ... but it would need large prog_array and cannot be optimised given + * the map key is not static. */ static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) { switch (proto) { case ETH_P_8021Q: case ETH_P_8021AD: - bpf_tail_call(skb, &jmp_table, PARSE_VLAN); + bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN); break; case ETH_P_MPLS_UC: case ETH_P_MPLS_MC: - bpf_tail_call(skb, &jmp_table, PARSE_MPLS); + bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS); break; case ETH_P_IP: - bpf_tail_call(skb, &jmp_table, PARSE_IP); + bpf_tail_call_static(skb, &jmp_table, PARSE_IP); break; case ETH_P_IPV6: - bpf_tail_call(skb, &jmp_table, PARSE_IPV6); + bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6); break; } } diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index de6de9221518..5a65f6b51377 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) switch (proto) { case bpf_htons(ETH_P_IP): - bpf_tail_call(skb, &jmp_table, IP); + bpf_tail_call_static(skb, &jmp_table, IP); break; case bpf_htons(ETH_P_IPV6): - bpf_tail_call(skb, &jmp_table, IPV6); + bpf_tail_call_static(skb, &jmp_table, IPV6); break; case bpf_htons(ETH_P_MPLS_MC): case bpf_htons(ETH_P_MPLS_UC): - bpf_tail_call(skb, &jmp_table, MPLS); + bpf_tail_call_static(skb, &jmp_table, MPLS); break; case bpf_htons(ETH_P_8021Q): case bpf_htons(ETH_P_8021AD): - bpf_tail_call(skb, &jmp_table, VLAN); + bpf_tail_call_static(skb, &jmp_table, VLAN); break; default: /* Protocol not supported */ @@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) switch (nexthdr) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: - bpf_tail_call(skb, &jmp_table, IPV6OP); + bpf_tail_call_static(skb, &jmp_table, IPV6OP); break; case IPPROTO_FRAGMENT: - bpf_tail_call(skb, &jmp_table, IPV6FR); + bpf_tail_call_static(skb, &jmp_table, IPV6FR); break; default: return parse_ip_proto(skb, nexthdr); diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c index 1f407e65ae52..7115bcefbe8a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall1.c +++ b/tools/testing/selftests/bpf/progs/tailcall1.c @@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb) /* Multiple locations to make sure we patch * all of them. */ - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return 3; } diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c index a093e739cf0e..0431e4fe7efd 100644 --- a/tools/testing/selftests/bpf/progs/tailcall2.c +++ b/tools/testing/selftests/bpf/progs/tailcall2.c @@ -13,14 +13,14 @@ struct { SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return 0; } SEC("classifier/1") int bpf_func_1(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return 1; } @@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb) SEC("classifier/3") int bpf_func_3(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 4); + bpf_tail_call_static(skb, &jmp_table, 4); return 3; } SEC("classifier/4") int bpf_func_4(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 3); + bpf_tail_call_static(skb, &jmp_table, 3); return 4; } SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); /* Check multi-prog update. */ - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); /* Check tail call limit. */ - bpf_tail_call(skb, &jmp_table, 3); + bpf_tail_call_static(skb, &jmp_table, 3); return 3; } diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index cabda877cf0a..739dc2a51e74 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -16,14 +16,14 @@ SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) { count++; - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 1; } SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c index b5d9c8e778ae..0103f3dd9f02 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c @@ -21,7 +21,7 @@ TAIL_FUNC(1) static __noinline int subprog_tail(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return skb->len * 2; } @@ -29,7 +29,7 @@ int subprog_tail(struct __sk_buff *skb) SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return subprog_tail(skb); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c index a004ab28ce28..7b1c04183824 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -14,9 +14,9 @@ static __noinline int subprog_tail(struct __sk_buff *skb) { if (load_byte(skb, 0)) - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); else - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 1; } @@ -32,7 +32,7 @@ int bpf_func_0(struct __sk_buff *skb) SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c index 96dbef2b6b7c..0d5482bea6c9 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -16,9 +16,9 @@ int subprog_tail2(struct __sk_buff *skb) volatile char arr[64] = {}; if (load_word(skb, 0) || load_half(skb, 0)) - bpf_tail_call(skb, &jmp_table, 10); + bpf_tail_call_static(skb, &jmp_table, 10); else - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return skb->len; } @@ -28,7 +28,7 @@ int subprog_tail(struct __sk_buff *skb) { volatile char arr[64] = {}; - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return skb->len * 2; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index 98b40a95bc67..9a1b166b7fbe 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -14,21 +14,21 @@ static volatile int count; __noinline int subprog_tail_2(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return skb->len * 3; } __noinline int subprog_tail_1(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return skb->len * 2; } __noinline int subprog_tail(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return skb->len; } -- cgit v1.3.1 From eef4a011f35d3aa657d4995c53ccb240d9eaea73 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:20 +0200 Subject: bpf, selftests: Add redirect_neigh selftest Add a small test that exercises the new redirect_neigh() helper for the IPv4 and IPv6 case. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/0fc7d9c5f9a6cc1c65b0d3be83b44b1ec9889f43.1601477936.git.daniel@iogearbox.net --- tools/testing/selftests/bpf/progs/test_tc_neigh.c | 144 +++++++++++++++++++ tools/testing/selftests/bpf/test_tc_neigh.sh | 168 ++++++++++++++++++++++ 2 files changed, 312 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/test_tc_neigh.c create mode 100755 tools/testing/selftests/bpf/test_tc_neigh.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c new file mode 100644 index 000000000000..889a72c3024f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifndef barrier_data +# define barrier_data(ptr) asm volatile("": :"r"(ptr) :"memory") +#endif + +#ifndef ctx_ptr +# define ctx_ptr(field) (void *)(long)(field) +#endif + +#define dst_to_src_tmp 0xeeddddeeU +#define src_to_dst_tmp 0xeeffffeeU + +#define ip4_src 0xac100164 /* 172.16.1.100 */ +#define ip4_dst 0xac100264 /* 172.16.2.100 */ + +#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } +#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } + +#ifndef v6_equal +# define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ + a.s6_addr32[1] == b.s6_addr32[1] && \ + a.s6_addr32[2] == b.s6_addr32[2] && \ + a.s6_addr32[3] == b.s6_addr32[3]) +#endif + +static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, + __be32 addr) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct iphdr *ip4h; + + if (data + sizeof(struct ethhdr) > data_end) + return false; + + ip4h = (struct iphdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip4h + 1) > data_end) + return false; + + return ip4h->daddr == addr; +} + +static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, + struct in6_addr addr) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct ipv6hdr *ip6h; + + if (data + sizeof(struct ethhdr) > data_end) + return false; + + ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip6h + 1) > data_end) + return false; + + return v6_equal(ip6h->daddr, addr); +} + +SEC("chk_neigh") int tc_chk(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + __u32 *raw = data; + + if (data + sizeof(struct ethhdr) > data_end) + return TC_ACT_SHOT; + + return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; +} + +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + int idx = dst_to_src_tmp; + __u8 zero[ETH_ALEN * 2]; + bool redirect = false; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src)); + break; + case __bpf_constant_htons(ETH_P_IPV6): + redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src); + break; + } + + if (!redirect) + return TC_ACT_OK; + + barrier_data(&idx); + idx = bpf_ntohl(idx); + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + return bpf_redirect_neigh(idx, 0); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + int idx = src_to_dst_tmp; + __u8 zero[ETH_ALEN * 2]; + bool redirect = false; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst)); + break; + case __bpf_constant_htons(ETH_P_IPV6): + redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst); + break; + } + + if (!redirect) + return TC_ACT_OK; + + barrier_data(&idx); + idx = bpf_ntohl(idx); + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + return bpf_redirect_neigh(idx, 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_neigh.sh b/tools/testing/selftests/bpf/test_tc_neigh.sh new file mode 100755 index 000000000000..31d8c3df8b24 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_neigh.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link +# between src and dst. The netns fwd has veth links to each src and dst. The +# client is in src and server in dst. The test installs a TC BPF program to each +# host facing veth in fwd which calls into bpf_redirect_peer() to perform the +# neigh addr population and redirect; it also installs a dropper prog on the +# egress side to drop skbs if neigh addrs were not populated. + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +# check that nc, dd, ping, ping6 and timeout are present +command -v nc >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v dd >/dev/null 2>&1 || \ + { echo >&2 "dd is not available"; exit 1; } +command -v timeout >/dev/null 2>&1 || \ + { echo >&2 "timeout is not available"; exit 1; } +command -v ping >/dev/null 2>&1 || \ + { echo >&2 "ping is not available"; exit 1; } +command -v ping6 >/dev/null 2>&1 || \ + { echo >&2 "ping6 is not available"; exit 1; } + +readonly GREEN='\033[0;92m' +readonly RED='\033[0;31m' +readonly NC='\033[0m' # No Color + +readonly PING_ARG="-c 3 -w 10 -q" + +readonly TIMEOUT=10 + +readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" +readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" +readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" + +readonly IP4_SRC="172.16.1.100" +readonly IP4_DST="172.16.2.100" + +readonly IP6_SRC="::1:dead:beef:cafe" +readonly IP6_DST="::2:dead:beef:cafe" + +readonly IP4_SLL="169.254.0.1" +readonly IP4_DLL="169.254.0.2" +readonly IP4_NET="169.254.0.0" + +cleanup() +{ + ip netns del ${NS_SRC} + ip netns del ${NS_FWD} + ip netns del ${NS_DST} +} + +trap cleanup EXIT + +set -e + +ip netns add "${NS_SRC}" +ip netns add "${NS_FWD}" +ip netns add "${NS_DST}" + +ip link add veth_src type veth peer name veth_src_fwd +ip link add veth_dst type veth peer name veth_dst_fwd + +ip link set veth_src netns ${NS_SRC} +ip link set veth_src_fwd netns ${NS_FWD} + +ip link set veth_dst netns ${NS_DST} +ip link set veth_dst_fwd netns ${NS_FWD} + +ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src +ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst + +# The fwd netns automatically get a v6 LL address / routes, but also needs v4 +# one in order to start ARP probing. IP4_NET route is added to the endpoints +# so that the ARP processing will reply. + +ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd +ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd + +ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad +ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad + +ip -netns ${NS_SRC} link set dev veth_src up +ip -netns ${NS_FWD} link set dev veth_src_fwd up + +ip -netns ${NS_DST} link set dev veth_dst up +ip -netns ${NS_FWD} link set dev veth_dst_fwd up + +ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global +ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global +ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global + +ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global +ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global + +ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global +ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global +ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global + +ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global +ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global + +fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) +fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) + +ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src +ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst + +ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src +ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst + +veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex | awk '{printf "%08x\n", $1}') +veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex | awk '{printf "%08x\n", $1}') + +xxd -p < test_tc_neigh.o | sed "s/eeddddee/$veth_src/g" | xxd -r -p > test_tc_neigh.x.o +xxd -p < test_tc_neigh.x.o | sed "s/eeffffee/$veth_dst/g" | xxd -r -p > test_tc_neigh.y.o + +ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact +ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.y.o sec src_ingress +ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh + +ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact +ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.y.o sec dst_ingress +ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh + +rm -f test_tc_neigh.x.o test_tc_neigh.y.o + +ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" +ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" + +set +e + +TEST="TCPv4 connectivity test" +ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" +if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 +fi +echo -e "${TEST}: ${GREEN}PASS${NC}" + +TEST="TCPv6 connectivity test" +ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" +if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 +fi +echo -e "${TEST}: ${GREEN}PASS${NC}" + +TEST="ICMPv4 connectivity test" +ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} +if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 +fi +echo -e "${TEST}: ${GREEN}PASS${NC}" + +TEST="ICMPv6 connectivity test" +ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} +if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 +fi +echo -e "${TEST}: ${GREEN}PASS${NC}" -- cgit v1.3.1 From 9c6c5c48d7e9c00c541e0f1a5cea3a96767e825c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 29 Sep 2020 16:28:40 -0700 Subject: libbpf: Make btf_dump work with modifiable BTF Ensure that btf_dump can accommodate new BTF types being appended to BTF instance after struct btf_dump was created. This came up during attemp to use btf_dump for raw type dumping in selftests, but given changes are not excessive, it's good to not have any gotchas in API usage, so I decided to support such use case in general. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200929232843.1249318-2-andriin@fb.com --- tools/lib/bpf/btf.c | 17 ++++++++++ tools/lib/bpf/btf_dump.c | 69 ++++++++++++++++++++++++++++------------- tools/lib/bpf/libbpf_internal.h | 1 + 3 files changed, 65 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 398b1f345b3c..231b07203e3d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -146,6 +146,23 @@ void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, return new_data + cur_cnt * elem_sz; } +/* Ensure given dynamically allocated memory region has enough allocated space + * to accommodate *need_cnt* elements of size *elem_sz* bytes each + */ +int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt) +{ + void *p; + + if (need_cnt <= *cap_cnt) + return 0; + + p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt); + if (!p) + return -ENOMEM; + + return 0; +} + static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) { __u32 *p; diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 91310e528a3a..2f9d685bd522 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -60,11 +60,14 @@ struct btf_dump { struct btf_dump_opts opts; int ptr_sz; bool strip_mods; + int last_id; /* per-type auxiliary state */ struct btf_dump_type_aux_state *type_states; + size_t type_states_cap; /* per-type optional cached unique name, must be freed, if present */ const char **cached_names; + size_t cached_names_cap; /* topo-sorted list of dependent type definitions */ __u32 *emit_queue; @@ -113,6 +116,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) } static int btf_dump_mark_referenced(struct btf_dump *d); +static int btf_dump_resize(struct btf_dump *d); struct btf_dump *btf_dump__new(const struct btf *btf, const struct btf_ext *btf_ext, @@ -144,25 +148,8 @@ struct btf_dump *btf_dump__new(const struct btf *btf, d->ident_names = NULL; goto err; } - d->type_states = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->type_states[0])); - if (!d->type_states) { - err = -ENOMEM; - goto err; - } - d->cached_names = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->cached_names[0])); - if (!d->cached_names) { - err = -ENOMEM; - goto err; - } - /* VOID is special */ - d->type_states[0].order_state = ORDERED; - d->type_states[0].emit_state = EMITTED; - - /* eagerly determine referenced types for anon enums */ - err = btf_dump_mark_referenced(d); + err = btf_dump_resize(d); if (err) goto err; @@ -172,9 +159,38 @@ err: return ERR_PTR(err); } +static int btf_dump_resize(struct btf_dump *d) +{ + int err, last_id = btf__get_nr_types(d->btf); + + if (last_id <= d->last_id) + return 0; + + if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap, + sizeof(*d->type_states), last_id + 1)) + return -ENOMEM; + if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap, + sizeof(*d->cached_names), last_id + 1)) + return -ENOMEM; + + if (d->last_id == 0) { + /* VOID is special */ + d->type_states[0].order_state = ORDERED; + d->type_states[0].emit_state = EMITTED; + } + + /* eagerly determine referenced types for anon enums */ + err = btf_dump_mark_referenced(d); + if (err) + return err; + + d->last_id = last_id; + return 0; +} + void btf_dump__free(struct btf_dump *d) { - int i, cnt; + int i; if (IS_ERR_OR_NULL(d)) return; @@ -182,7 +198,7 @@ void btf_dump__free(struct btf_dump *d) free(d->type_states); if (d->cached_names) { /* any set cached name is owned by us and should be freed */ - for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) { + for (i = 0; i <= d->last_id; i++) { if (d->cached_names[i]) free((void *)d->cached_names[i]); } @@ -222,6 +238,10 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) if (id > btf__get_nr_types(d->btf)) return -EINVAL; + err = btf_dump_resize(d); + if (err) + return err; + d->emit_queue_cnt = 0; err = btf_dump_order_type(d, id, false); if (err < 0) @@ -251,7 +271,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) const struct btf_type *t; __u16 vlen; - for (i = 1; i <= n; i++) { + for (i = d->last_id + 1; i <= n; i++) { t = btf__type_by_id(d->btf, i); vlen = btf_vlen(t); @@ -306,6 +326,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) } return 0; } + static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id) { __u32 *new_queue; @@ -1049,11 +1070,15 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, const struct btf_dump_emit_type_decl_opts *opts) { const char *fname; - int lvl; + int lvl, err; if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) return -EINVAL; + err = btf_dump_resize(d); + if (err) + return -EINVAL; + fname = OPTS_GET(opts, field_name, ""); lvl = OPTS_GET(opts, indent_level, 0); d->strip_mods = OPTS_GET(opts, strip_mods, false); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index eed5b624a784..d99bc847bf84 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -107,6 +107,7 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t cur_cnt, size_t max_cnt, size_t add_cnt); +int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt); static inline bool libbpf_validate_opts(const char *opts, size_t opts_sz, size_t user_sz, -- cgit v1.3.1 From f4d385e4d51d035c7f0d68a3e9564c9453c13aa4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 29 Sep 2020 16:28:43 -0700 Subject: selftests/bpf: Test "incremental" btf_dump in C format Add test validating that btf_dump works fine with BTFs that are modified and incrementally generated. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200929232843.1249318-5-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/btf_dump.c | 105 ++++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 39fb81d9daeb..c60091ee8a21 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -129,6 +129,109 @@ done: return err; } +static char *dump_buf; +static size_t dump_buf_sz; +static FILE *dump_buf_file; + +void test_btf_dump_incremental(void) +{ + struct btf *btf = NULL; + struct btf_dump *d = NULL; + struct btf_dump_opts opts; + int id, err, i; + + dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); + if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) + return; + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "new_empty")) + goto err_out; + opts.ctx = dump_buf_file; + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); + if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) + goto err_out; + + /* First, generate BTF corresponding to the following C code: + * + * enum { VAL = 1 }; + * + * struct s { int x; }; + * + */ + id = btf__add_enum(btf, NULL, 4); + ASSERT_EQ(id, 1, "enum_id"); + err = btf__add_enum_value(btf, "VAL", 1); + ASSERT_OK(err, "enum_val_ok"); + + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(id, 2, "int_id"); + + id = btf__add_struct(btf, "s", 4); + ASSERT_EQ(id, 3, "struct_id"); + err = btf__add_field(btf, "x", 2, 0, 0); + ASSERT_OK(err, "field_ok"); + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + err = btf_dump__dump_type(d, i); + ASSERT_OK(err, "dump_type_ok"); + } + + fflush(dump_buf_file); + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ + ASSERT_STREQ(dump_buf, +"enum {\n" +" VAL = 1,\n" +"};\n" +"\n" +"struct s {\n" +" int x;\n" +"};\n\n", "c_dump1"); + + /* Now, after dumping original BTF, append another struct that embeds + * anonymous enum. It also has a name conflict with the first struct: + * + * struct s___2 { + * enum { VAL___2 = 1 } x; + * struct s s; + * }; + * + * This will test that btf_dump'er maintains internal state properly. + * Note that VAL___2 enum value. It's because we've already emitted + * that enum as a global anonymous enum, so btf_dump will ensure that + * enum values don't conflict; + * + */ + fseek(dump_buf_file, 0, SEEK_SET); + + id = btf__add_struct(btf, "s", 4); + ASSERT_EQ(id, 4, "struct_id"); + err = btf__add_field(btf, "x", 1, 0, 0); + ASSERT_OK(err, "field_ok"); + err = btf__add_field(btf, "s", 3, 32, 0); + ASSERT_OK(err, "field_ok"); + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + err = btf_dump__dump_type(d, i); + ASSERT_OK(err, "dump_type_ok"); + } + + fflush(dump_buf_file); + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ + ASSERT_STREQ(dump_buf, +"struct s___2 {\n" +" enum {\n" +" VAL___2 = 1,\n" +" } x;\n" +" struct s s;\n" +"};\n\n" , "c_dump1"); + +err_out: + fclose(dump_buf_file); + free(dump_buf); + btf_dump__free(d); + btf__free(btf); +} + void test_btf_dump() { int i; @@ -140,4 +243,6 @@ void test_btf_dump() { test_btf_dump_case(i, &btf_dump_test_cases[i]); } + if (test__start_subtest("btf_dump: incremental")) + test_btf_dump_incremental(); } -- cgit v1.3.1 From 6e0972e0c5d8f78b64274fd1a512136aee404610 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:07 +0200 Subject: selftests: forwarding: devlink_lib: Split devlink_..._set() into save & set Changing pool type from static to dynamic causes reinterpretation of threshold values. They therefore need to be saved before pool type is changed, then the pool type can be changed, and then the new values need to be set up. For that reason, set cannot subsume save, because it would be saving the wrong thing, with possibly a nonsensical value, and restore would then fail to restore the nonsensical value. Thus extract a _save() from each of the relevant _set()'s. This way it is possible to save everything up front, then to tweak it, and then restore in the required order. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/qos_ets_strict.sh | 9 ++++ .../selftests/drivers/net/mlxsw/qos_mc_aware.sh | 5 ++ .../testing/selftests/drivers/net/mlxsw/sch_ets.sh | 6 +++ .../selftests/drivers/net/mlxsw/sch_red_core.sh | 1 + .../selftests/net/forwarding/devlink_lib.sh | 62 ++++++++++++++++++---- 5 files changed, 72 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index 6d1790b5de7a..e9f8718af979 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -147,17 +147,26 @@ switch_create() # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. + devlink_pool_size_thtype_save 0 devlink_pool_size_thtype_set 0 dynamic 10000000 + devlink_pool_size_thtype_save 4 devlink_pool_size_thtype_set 4 dynamic 10000000 + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 6 + devlink_tc_bind_pool_th_save $swp1 1 ingress devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 6 + devlink_tc_bind_pool_th_save $swp2 2 ingress devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + devlink_tc_bind_pool_th_save $swp3 1 egress devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + devlink_tc_bind_pool_th_save $swp3 2 egress devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 7 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index b025daea062d..8f164c80e215 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -145,12 +145,17 @@ switch_create() # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 5 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 5 + devlink_tc_bind_pool_th_save $swp2 1 ingress devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 12 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index 94c37124a840..af64bc9ea8ab 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -27,11 +27,17 @@ switch_create() # amount of traffic that is admitted to the shared buffers. This makes # sure that there is always enough traffic of all types to select from # for the DWRR process. + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 12 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + devlink_port_pool_th_save $swp2 4 devlink_port_pool_th_set $swp2 4 12 + devlink_tc_bind_pool_th_save $swp2 7 egress devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + devlink_tc_bind_pool_th_save $swp2 6 egress devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + devlink_tc_bind_pool_th_save $swp2 5 egress devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 517297a14ecf..b0cb1aaffdda 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -208,6 +208,7 @@ switch_create() ip link set dev br2_11 up local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) + devlink_port_pool_th_save $swp3 8 devlink_port_pool_th_set $swp3 8 $size } diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 75fe24bcb9cd..ba6aca848702 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -117,6 +117,12 @@ devlink_reload() declare -A DEVLINK_ORIG +# Changing pool type from static to dynamic causes reinterpretation of threshold +# values. They therefore need to be saved before pool type is changed, then the +# pool type can be changed, and then the new values need to be set up. Therefore +# instead of saving the current state implicitly in the _set call, provide +# functions for all three primitives: save, set, and restore. + devlink_port_pool_threshold() { local port=$1; shift @@ -126,14 +132,21 @@ devlink_port_pool_threshold() | jq '.port_pool."'"$port"'"[].threshold' } -devlink_port_pool_th_set() +devlink_port_pool_th_save() { local port=$1; shift local pool=$1; shift - local th=$1; shift local key="port_pool($port,$pool).threshold" DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool) +} + +devlink_port_pool_th_set() +{ + local port=$1; shift + local pool=$1; shift + local th=$1; shift + devlink sb port pool set $port pool $pool th $th } @@ -142,8 +155,13 @@ devlink_port_pool_th_restore() local port=$1; shift local pool=$1; shift local key="port_pool($port,$pool).threshold" + local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]} + if [[ -z $orig ]]; then + echo "WARNING: Mismatched devlink_port_pool_th_restore" + else + devlink sb port pool set $port pool $pool th $orig + fi } devlink_pool_size_thtype() @@ -154,14 +172,20 @@ devlink_pool_size_thtype() | jq -r '.pool[][] | (.size, .thtype)' } +devlink_pool_size_thtype_save() +{ + local pool=$1; shift + local key="pool($pool).size_thtype" + + DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) +} + devlink_pool_size_thtype_set() { local pool=$1; shift local thtype=$1; shift local size=$1; shift - local key="pool($pool).size_thtype" - DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype } @@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore() local key="pool($pool).size_thtype" local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb pool set "$DEVLINK_DEV" pool $pool \ - size ${orig[0]} thtype ${orig[1]} + if [[ -z ${orig[0]} ]]; then + echo "WARNING: Mismatched devlink_pool_size_thtype_restore" + else + devlink sb pool set "$DEVLINK_DEV" pool $pool \ + size ${orig[0]} thtype ${orig[1]} + fi } devlink_tc_bind_pool_th() @@ -185,6 +213,16 @@ devlink_tc_bind_pool_th() | jq -r '.tc_bind[][] | (.pool, .threshold)' } +devlink_tc_bind_pool_th_save() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + local key="tc_bind($port,$dir,$tc).pool_th" + + DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) +} + devlink_tc_bind_pool_th_set() { local port=$1; shift @@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set() local dir=$1; shift local pool=$1; shift local th=$1; shift - local key="tc_bind($port,$dir,$tc).pool_th" - DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) devlink sb tc bind set $port tc $tc type $dir pool $pool th $th } @@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore() local key="tc_bind($port,$dir,$tc).pool_th" local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb tc bind set $port tc $tc type $dir \ - pool ${orig[0]} th ${orig[1]} + if [[ -z ${orig[0]} ]]; then + echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore" + else + devlink sb tc bind set $port tc $tc type $dir \ + pool ${orig[0]} th ${orig[1]} + fi } devlink_traps_num_get() -- cgit v1.3.1 From 294f44c19fa65b0c813608d7ddfbb378875eb6b0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:08 +0200 Subject: selftests: forwarding: devlink_lib: Add devlink_cell_size_get() Add a helper that answers the cell size of the devlink device. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/devlink_lib.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index ba6aca848702..cf4e5daf767b 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -549,3 +549,9 @@ devlink_cpu_port_get() echo "$DEVLINK_DEV/$cpu_dl_port_num" } + +devlink_cell_size_get() +{ + devlink sb pool show "$DEVLINK_DEV" pool 0 -j \ + | jq '.pool[][].cell_size' +} -- cgit v1.3.1 From 5b3a53c9c843b84955d5a3abda358518a3031630 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:09 +0200 Subject: selftests: forwarding: devlink_lib: Support port-less topologies Some selftests may not need any actual ports. Technically those are not forwarding selftests, but devlink_lib can still be handy. Fall back on NETIF_NO_CABLE in those cases. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/devlink_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index cf4e5daf767b..9c12c4fd3afc 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -5,7 +5,7 @@ # Defines if [[ ! -v DEVLINK_DEV ]]; then - DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \ + DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \ | jq -r '.port | keys[]' | cut -d/ -f-2) if [ -z "$DEVLINK_DEV" ]; then echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" -- cgit v1.3.1 From 4b94a2fad835ad5022a099105b7bf27eb584d754 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:10 +0200 Subject: selftests: mlxsw: qos_lib: Add a wrapper for running mlnx_qos mlnx_qos is a script for configuration of DCB. Despite the name it is not actually Mellanox-specific in any way. It is currently the only ad-hoc tool available (in contrast to a daemon that manages an interface on an ongoing basis). However, it is very verbose and parsing out error messages is not really possible. Add a wrapper that makes it easier to use the tool. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index faa51012cdac..0bf76f13c030 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -82,3 +82,17 @@ bail_on_lldpad() fi fi } + +__mlnx_qos() +{ + local err + + mlnx_qos "$@" 2>/dev/null + err=$? + + if ((err)); then + echo "Error ($err) in mlnx_qos $@" >/dev/stderr + fi + + return $err +} -- cgit v1.3.1 From a65cc53a0eb882c5f6d49cda947aac7fdc9b0f73 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:11 +0200 Subject: selftests: mlxsw: Add headroom handling test Add a test for headroom configuration. This covers projection of ETS configuration to ingress, PFC, adjustments for MTU, the qdisc / TC mode and the effect of egress SPAN session on buffer configuration. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/qos_headroom.sh | 379 +++++++++++++++++++++ 1 file changed, 379 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh new file mode 100755 index 000000000000..27de3d9ed08e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh @@ -0,0 +1,379 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_defaults + test_dcb_ets + test_mtu + test_pfc + test_int_buf + test_tc_priomap + test_tc_mtu + test_tc_sizes + test_tc_int_buf +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +swp=$NETIF_NO_CABLE + +cleanup() +{ + pre_cleanup +} + +get_prio_pg() +{ + __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | + grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2- +} + +get_prio_pfc() +{ + __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | + grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2- +} + +get_prio_tc() +{ + __mlnx_qos -i $swp | sed -n '/^tc/,$p' | + awk '/^tc/ { TC = $2 } + /priority:/ { PRIO[$2]=TC } + END { + for (i in PRIO) + printf("%d ", PRIO[i]) + }' +} + +get_buf_size() +{ + local idx=$1; shift + + __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1)) +} + +get_tot_size() +{ + __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//' +} + +check_prio_pg() +{ + local expect=$1; shift + + local current=$(get_prio_pg) + test "$current" = "$expect" + check_err $? "prio2buffer is '$current', expected '$expect'" +} + +check_prio_pfc() +{ + local expect=$1; shift + + local current=$(get_prio_pfc) + test "$current" = "$expect" + check_err $? "prio PFC is '$current', expected '$expect'" +} + +check_prio_tc() +{ + local expect=$1; shift + + local current=$(get_prio_tc) + test "$current" = "$expect" + check_err $? "prio_tc is '$current', expected '$expect'" +} + +__check_buf_size() +{ + local idx=$1; shift + local expr=$1; shift + local what=$1; shift + + local current=$(get_buf_size $idx) + ((current $expr)) + check_err $? "${what}buffer $idx size is '$current', expected '$expr'" + echo $current +} + +check_buf_size() +{ + __check_buf_size "$@" > /dev/null +} + +test_defaults() +{ + RET=0 + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + log_test "Default headroom configuration" +} + +test_dcb_ets() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null + + check_prio_pg "0 2 4 6 1 3 5 7 " + check_prio_tc "0 2 4 6 1 3 5 7 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null + check_fail $? "prio2buffer accepted in DCB mode" + + log_test "Configuring headroom through ETS" +} + +test_mtu() +{ + local what=$1; shift + local buf0size_2 + local buf0size + + RET=0 + buf0size=$(__check_buf_size 0 "> 0") + + mtu_set $swp 3000 + buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ") + mtu_restore $swp + + mtu_set $swp 6000 + check_buf_size 0 "> $buf0size_2" "MTU 6000: " + mtu_restore $swp + + check_buf_size 0 "== $buf0size" + + log_test "${what}MTU impacts buffer size" +} + +test_tc_mtu() +{ + # In TC mode, MTU still impacts the threshold below which a buffer is + # not permitted to go. + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_mtu "TC: " + tc qdisc delete dev $swp root +} + +test_pfc() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null + + local buf0size=$(get_buf_size 0) + local buf1size=$(get_buf_size 1) + local buf2size=$(get_buf_size 2) + local buf3size=$(get_buf_size 3) + check_buf_size 0 "> 0" + check_buf_size 1 "> 0" + check_buf_size 2 "> 0" + check_buf_size 3 "> 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "Buffer size sans PFC" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null + + check_prio_pg "0 0 0 0 0 1 2 3 " + check_prio_pfc "0 0 0 0 0 1 1 1 " + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf2size" + check_buf_size 3 "> $buf3size" + + local buf1size=$(get_buf_size 1) + check_buf_size 2 "== $buf1size" + check_buf_size 3 "== $buf1size" + + log_test "PFC: Cable length 0" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null + + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf1size" + check_buf_size 3 "> $buf1size" + + log_test "PFC: Cable length 1000" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_buf_size 0 "> 0" + check_buf_size 1 "== 0" + check_buf_size 2 "== 0" + check_buf_size 3 "== 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "PFC: Restore defaults" +} + +test_tc_priomap() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null + check_prio_pg "0 1 2 3 4 5 6 7 " + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + check_prio_pg "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null + check_prio_pg "1 3 5 7 0 2 4 6 " + + tc qdisc delete dev $swp root + check_prio_pg "0 1 2 3 4 5 6 7 " + + # Clean up. + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null + tc qdisc delete dev $swp root + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + log_test "TC: priomap" +} + +test_tc_sizes() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + RET=0 + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + check_fail $? "buffer_size should fail before qdisc is added" + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_err $? "buffer_size should pass after qdisc is added" + check_buf_size 0 "== $size" "set size: " + + mtu_set $swp 6000 + check_buf_size 0 "== $size" "set MTU: " + mtu_restore $swp + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + + # After replacing the qdisc for the same kind, buffer_size still has to + # work. + tc qdisc replace dev $swp root handle 1: bfifo limit 1M + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_buf_size 0 "== $size" "post replace, set size: " + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + + # Likewise after replacing for a different kind. + tc qdisc replace dev $swp root handle 2: prio bands 8 + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_buf_size 0 "== $size" "post replace different kind, set size: " + + tc qdisc delete dev $swp root + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + check_fail $? "buffer_size should fail after qdisc is deleted" + + log_test "TC: buffer size" +} + +test_int_buf() +{ + local what=$1; shift + + RET=0 + + local buf0size=$(get_buf_size 0) + local tot_size=$(get_tot_size) + + # Size of internal buffer and buffer 9. + local dsize=$((tot_size - buf0size)) + + tc qdisc add dev $swp clsact + tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp + + local buf0size_2=$(get_buf_size 0) + local tot_size_2=$(get_tot_size) + local dsize_2=$((tot_size_2 - buf0size_2)) + + # Egress SPAN should have added to the "invisible" buffer configuration. + ((dsize_2 > dsize)) + check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'" + + mtu_set $swp 3000 + + local buf0size_3=$(get_buf_size 0) + local tot_size_3=$(get_tot_size) + local dsize_3=$((tot_size_3 - buf0size_3)) + + # MTU change might change buffer 0, which will show at total, but the + # hidden buffers should stay the same size. + ((dsize_3 == dsize_2)) + check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'" + + mtu_restore $swp + tc qdisc del dev $swp clsact + + # After SPAN removal, hidden buffers should be back to the original sizes. + local buf0size_4=$(get_buf_size 0) + local tot_size_4=$(get_tot_size) + local dsize_4=$((tot_size_4 - buf0size_4)) + ((dsize_4 == dsize)) + check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'" + + log_test "${what}internal buffer size" +} + +test_tc_int_buf() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_int_buf "TC: " + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + test_int_buf "TC+buffsize: " + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + tc qdisc delete dev $swp root +} + +trap cleanup EXIT + +bail_on_lldpad +setup_wait +tests_run + +exit $EXIT_STATUS -- cgit v1.3.1 From bfa804784e321eb6a309a0959a17c64b08fb7ac7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:12 +0200 Subject: selftests: mlxsw: Add a PFC test Add a test for PFC. Runs 10MB of traffic through a bottleneck and checks that none of it gets lost. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/qos_pfc.sh | 403 +++++++++++++++++++++ 1 file changed, 403 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh new file mode 100755 index 000000000000..4d900bc1f76c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -0,0 +1,403 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority +# of 1. This stream is consistently prioritized as priority 1, is put to PG +# buffer 1, and scheduled at TC 1. +# +# - the stream first ingresses through $swp1, where it is forwarded to $swp3 +# +# - then it ingresses through $swp4. Here it is put to a lossless buffer and put +# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is +# shaped, and thus the PFC pool eventually fills, therefore the headroom +# fills, and $swp3 is paused. +# +# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at +# a pool ("overflow pool"). The overflow pool needs to be large enough to +# contain the whole burst. +# +# - eventually the PFC pool gets some traffic out, headroom therefore gets some +# traffic to the pool, and $swp3 is unpaused again. This way the traffic is +# gradually forwarded from the overflow pool, through the PFC pool, out of +# $swp2, and eventually to $h2. +# +# - if PFC works, all lossless flow packets that ingress through $swp1 should +# also be seen ingressing $h2. If it doesn't, there will be drops due to +# discrepancy between the speeds of $swp1 and $h2. +# +# - it should all play out relatively quickly, so that SLL and HLL will not +# cause drops. +# +# +-----------------------+ +# | H1 | +# | + $h1.111 | +# | | 192.0.2.33/28 | +# | | | +# | + $h1 | +# +---|-------------------+ +--------------------+ +# | | | +# +---|----------------------|--------------------|---------------------------+ +# | + $swp1 $swp3 + + $swp4 | +# | | iPOOL1 iPOOL0 | | iPOOL2 | +# | | ePOOL4 ePOOL5 | | ePOOL4 | +# | | 1Gbps | | 1Gbps | +# | | PFC:enabled=1 | | PFC:enabled=1 | +# | +-|----------------------|-+ +-|------------------------+ | +# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | +# | | | | | | +# | | BR1 | | BR2 | | +# | | | | | | +# | | | | + $swp2.111 | | +# | +--------------------------+ +---------|----------------+ | +# | | | +# | iPOOL0: 500KB dynamic | | +# | iPOOL1: 10MB static | | +# | iPOOL2: 1MB static + $swp2 | +# | ePOOL4: 500KB dynamic | iPOOL0 | +# | ePOOL5: 10MB static | ePOOL6 | +# | ePOOL6: "infinite" static | 200Mbps shaper | +# +-------------------------------------------------------|-------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# +# iPOOL0+ePOOL4 is a helper pool for control traffic etc. +# iPOOL1+ePOOL5 are overflow pools. +# iPOOL2+ePOOL6 are PFC pools. + +ALL_TESTS=" + ping_ipv4 + test_qos_pfc +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +_1KB=1000 +_100KB=$((100 * _1KB)) +_500KB=$((500 * _1KB)) +_1MB=$((1000 * _1KB)) +_10MB=$((10 * _1MB)) + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.34/28 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +switch_create() +{ + # pools + # ----- + + devlink_pool_size_thtype_save 0 + devlink_pool_size_thtype_save 4 + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_save 5 + devlink_pool_size_thtype_save 2 + devlink_pool_size_thtype_save 6 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_save $swp2 6 + devlink_port_pool_th_save $swp3 5 + devlink_port_pool_th_save $swp4 2 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_save $swp2 1 egress + devlink_tc_bind_pool_th_save $swp3 1 egress + devlink_tc_bind_pool_th_save $swp4 1 ingress + + # Control traffic pools. Just reduce the size. Keep them dynamic so that + # we don't need to change all the uninteresting quotas. + devlink_pool_size_thtype_set 0 dynamic $_500KB + devlink_pool_size_thtype_set 4 dynamic $_500KB + + # Overflow pools. + devlink_pool_size_thtype_set 1 static $_10MB + devlink_pool_size_thtype_set 5 static $_10MB + + # PFC pools. As per the writ, the size of egress PFC pool should be + # infinice, but actually it just needs to be large enough to not matter + # in practice, so reuse the 10MB limit. + devlink_pool_size_thtype_set 2 static $_1MB + devlink_pool_size_thtype_set 6 static $_10MB + + # $swp1 + # ----- + + ip link set dev $swp1 up + mtu_set $swp1 10000 + vlan_create $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp1 1 $_10MB + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB + + # Configure qdisc so that we can configure PG and therefore pool + # assignment. + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + + # $swp2 + # ----- + + ip link set dev $swp2 up + mtu_set $swp2 10000 + vlan_create $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp2 6 $_10MB + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped. + tc qdisc replace dev $swp2 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + tc qdisc replace dev $swp2 parent 1:7 handle 17: \ + tbf rate 200Mbit burst 131072 limit 1M + + # $swp3 + # ----- + + ip link set dev $swp3 up + mtu_set $swp3 10000 + vlan_create $swp3 111 + ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp3 5 $_10MB + devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp3 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # Need to enable PFC so that PAUSE takes effect. Therefore need to put + # the lossless prio into a buffer of its own. Don't bother with buffer + # sizes though, there is not going to be any pressure in the "backward" + # direction. + __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null + + # $swp4 + # ----- + + ip link set dev $swp4 up + mtu_set $swp4 10000 + vlan_create $swp4 111 + ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp4 2 $_1MB + devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB + + # Configure qdisc so that we can hand-tune headroom. + tc qdisc replace dev $swp4 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null + # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which + # is (-2*MTU) about 80K of delay provision. + __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + + # bridges + # ------- + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br1 + ip link set dev $swp3.111 master br1 + ip link set dev br1 up + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev $swp2.111 master br2 + ip link set dev $swp4.111 master br2 + ip link set dev br2 up +} + +switch_destroy() +{ + # Do this first so that we can reset the limits to values that are only + # valid for the original static / dynamic setting. + devlink_pool_size_thtype_restore 6 + devlink_pool_size_thtype_restore 5 + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 2 + devlink_pool_size_thtype_restore 1 + devlink_pool_size_thtype_restore 0 + + # bridges + # ------- + + ip link set dev br2 down + ip link set dev $swp4.111 nomaster + ip link set dev $swp2.111 nomaster + ip link del dev br2 + + ip link set dev br1 down + ip link set dev $swp3.111 nomaster + ip link set dev $swp1.111 nomaster + ip link del dev br1 + + # $swp4 + # ----- + + __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp4 root + + devlink_tc_bind_pool_th_restore $swp4 1 ingress + devlink_port_pool_th_restore $swp4 2 + + vlan_destroy $swp4 111 + mtu_restore $swp4 + ip link set dev $swp4 down + + # $swp3 + # ----- + + __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp3 root + + devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_port_pool_th_restore $swp3 5 + + vlan_destroy $swp3 111 + mtu_restore $swp3 + ip link set dev $swp3 down + + # $swp2 + # ----- + + tc qdisc del dev $swp2 parent 1:7 + tc qdisc del dev $swp2 root + + devlink_tc_bind_pool_th_restore $swp2 1 egress + devlink_port_pool_th_restore $swp2 6 + + vlan_destroy $swp2 111 + mtu_restore $swp2 + ip link set dev $swp2 down + + # $swp1 + # ----- + + __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp1 root + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 1 + + vlan_destroy $swp1 111 + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + swp4=${NETIFS[p6]} + + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 +} + +test_qos_pfc() +{ + RET=0 + + # 10M pool, each packet is 8K of payload + headers + local pkts=$((_10MB / 8050)) + local size=$((pkts * 8050)) + local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \ + -a own -b $h2mac -c $pkts -t udp -q + sleep 2 + + local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + local din=$((in1 - in0)) + local dout=$((out1 - out0)) + + local pct_in=$((din * 100 / size)) + + ((pct_in > 95 && pct_in < 105)) + check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%" + + ((dout == din)) + check_err $? "$((din - dout)) bytes out of $din ingressed got lost" + + log_test "PFC" +} + +trap cleanup EXIT + +bail_on_lldpad +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS -- cgit v1.3.1 From b7cc6d3c5c9151a0475d643e212279e82531e527 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 29 Sep 2020 11:15:56 +0300 Subject: selftests: net: Add drop monitor test Test that drop monitor correctly captures both software and hardware originated packet drops. # ./drop_monitor_tests.sh Software drops test TEST: Capturing active software drops [ OK ] TEST: Capturing inactive software drops [ OK ] Hardware drops test TEST: Capturing active hardware drops [ OK ] TEST: Capturing inactive hardware drops [ OK ] Tests passed: 4 Tests failed: 0 Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 3 + tools/testing/selftests/net/drop_monitor_tests.sh | 215 ++++++++++++++++++++++ 3 files changed, 219 insertions(+) create mode 100755 tools/testing/selftests/net/drop_monitor_tests.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 9491bbaa0831..52923eb08934 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -19,6 +19,7 @@ TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py +TEST_PROGS += drop_monitor_tests.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5a57ea02802d..43649242adc0 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -30,3 +30,6 @@ CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y +CONFIG_TRACEPOINTS=y +CONFIG_NET_DROP_MONITOR=m +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh new file mode 100755 index 000000000000..b7650e30d18b --- /dev/null +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking drop monitor functionality. + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# all tests in this script. Can be overridden with -t option +TESTS=" + sw_drops + hw_drops +" + +IP="ip -netns ns1" +TC="tc -netns ns1" +DEVLINK="devlink -N ns1" +NS_EXEC="ip netns exec ns1" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + fi +} + +setup() +{ + modprobe netdevsim &> /dev/null + + set -e + ip netns add ns1 + $IP link add dummy10 up type dummy + + $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + udevadm settle + local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/) + $IP link set dev $netdev up + + set +e +} + +cleanup() +{ + $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + ip netns del ns1 +} + +sw_drops_test() +{ + echo + echo "Software drops test" + + setup + + local dir=$(mktemp -d) + + $TC qdisc add dev dummy10 clsact + $TC filter add dev dummy10 egress pref 1 handle 101 proto ip \ + flower dst_ip 192.0.2.10 action drop + + $NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \ + -A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \ + -d 100msec & + timeout 5 dwdump -o sw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0)) + log_test $? 0 "Capturing active software drops" + + rm ${dir}/packets.pcap + + { kill %% && wait %%; } 2>/dev/null + timeout 5 dwdump -o sw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0)) + log_test $? 0 "Capturing inactive software drops" + + rm -r $dir + + cleanup +} + +hw_drops_test() +{ + echo + echo "Hardware drops test" + + setup + + local dir=$(mktemp -d) + + $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap + timeout 5 dwdump -o hw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \ + | wc -l) != 0)) + log_test $? 0 "Capturing active hardware drops" + + rm ${dir}/packets.pcap + + $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop + timeout 5 dwdump -o hw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \ + | wc -l) == 0)) + log_test $? 0 "Capturing inactive hardware drops" + + rm -r $dir + + cleanup +} + +################################################################################ +# usage + +usage() +{ + cat < Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v devlink)" ]; then + echo "SKIP: Could not run test without devlink tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tshark)" ]; then + echo "SKIP: Could not run test without tshark tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v dwdump)" ]; then + echo "SKIP: Could not run test without dwdump tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v udevadm)" ]; then + echo "SKIP: Could not run test without udevadm tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v timeout)" ]; then + echo "SKIP: Could not run test without timeout tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +tshark -G fields 2> /dev/null | grep -q net_dm +if [ $? -ne 0 ]; then + echo "SKIP: tshark too old, missing net_dm dissector" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null + +for t in $TESTS +do + case $t in + sw_drops|sw) sw_drops_test;; + hw_drops|hw) hw_drops_test;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret -- cgit v1.3.1 From 3effc06a4dde3e2f6ee3d82e59d97ec357802a4b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 30 Sep 2020 11:36:01 +0200 Subject: selftests/bpf: Fix alignment of .BTF_ids Fix a build failure on arm64, due to missing alignment information for the .BTF_ids section: resolve_btfids.test.o: in function `test_resolve_btfids': tools/testing/selftests/bpf/prog_tests/resolve_btfids.c:140:(.text+0x29c): relocation truncated to fit: R_AARCH64_LDST32_ABS_LO12_NC against `.BTF_ids' ld: tools/testing/selftests/bpf/prog_tests/resolve_btfids.c:140: warning: one possible cause of this error is that the symbol is being referenced in the indicated code as if it had a larger alignment than was declared where it was defined In vmlinux, the .BTF_ids section is aligned to 4 bytes by vmlinux.lds.h. In test_progs however, .BTF_ids doesn't have alignment constraints. The arm64 linker expects the btf_id_set.cnt symbol, a u32, to be naturally aligned but finds it misaligned and cannot apply the relocation. Enforce alignment of .BTF_ids to 4 bytes. Fixes: cd04b04de119 ("selftests/bpf: Add set test to resolve_btfids") Signed-off-by: Jean-Philippe Brucker Signed-off-by: Alexei Starovoitov Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20200930093559.2120126-1-jean-philippe@linaro.org --- tools/testing/selftests/bpf/prog_tests/resolve_btfids.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index 8826c652adad..6ace5e9efec1 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -28,6 +28,12 @@ struct symbol test_symbols[] = { { "func", BTF_KIND_FUNC, -1 }, }; +/* Align the .BTF_ids section to 4 bytes */ +asm ( +".pushsection " BTF_IDS_SECTION " ,\"a\"; \n" +".balign 4, 0; \n" +".popsection; \n"); + BTF_ID_LIST(test_list_local) BTF_ID_UNUSED BTF_ID(typedef, S) -- cgit v1.3.1 From 792caccc4526bb489e054f9ab61d7c024b15dea2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 30 Sep 2020 15:49:26 -0700 Subject: bpf: Introduce BPF_F_PRESERVE_ELEMS for perf event array Currently, perf event in perf event array is removed from the array when the map fd used to add the event is closed. This behavior makes it difficult to the share perf events with perf event array. Introduce perf event map that keeps the perf event open with a new flag BPF_F_PRESERVE_ELEMS. With this flag set, perf events in the array are not removed when the original map fd is closed. Instead, the perf event will stay in the map until 1) it is explicitly removed from the array; or 2) the array is freed. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200930224927.1936644-2-songliubraving@fb.com --- include/uapi/linux/bpf.h | 3 +++ kernel/bpf/arraymap.c | 19 +++++++++++++++++-- tools/include/uapi/linux/bpf.h | 3 +++ 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1f17c6752deb..4f556cfcbfbe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -414,6 +414,9 @@ enum { /* Enable memory-mapping BPF map */ BPF_F_MMAPABLE = (1U << 10), + +/* Share perf_event among processes */ + BPF_F_PRESERVE_ELEMS = (1U << 11), }; /* Flags for BPF_PROG_QUERY. */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index e5fd31268ae0..bd777dd6f967 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -15,7 +15,8 @@ #include "map_in_map.h" #define ARRAY_CREATE_FLAG_MASK \ - (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK) + (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \ + BPF_F_PRESERVE_ELEMS) static void bpf_array_free_percpu(struct bpf_array *array) { @@ -64,6 +65,10 @@ int array_map_alloc_check(union bpf_attr *attr) attr->map_flags & BPF_F_MMAPABLE) return -EINVAL; + if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY && + attr->map_flags & BPF_F_PRESERVE_ELEMS) + return -EINVAL; + if (attr->value_size > KMALLOC_MAX_SIZE) /* if value_size is bigger, the user space won't be able to * access the elements. @@ -1134,6 +1139,9 @@ static void perf_event_fd_array_release(struct bpf_map *map, struct bpf_event_entry *ee; int i; + if (map->map_flags & BPF_F_PRESERVE_ELEMS) + return; + rcu_read_lock(); for (i = 0; i < array->map.max_entries; i++) { ee = READ_ONCE(array->ptrs[i]); @@ -1143,12 +1151,19 @@ static void perf_event_fd_array_release(struct bpf_map *map, rcu_read_unlock(); } +static void perf_event_fd_array_map_free(struct bpf_map *map) +{ + if (map->map_flags & BPF_F_PRESERVE_ELEMS) + bpf_fd_array_map_clear(map); + fd_array_map_free(map); +} + static int perf_event_array_map_btf_id; const struct bpf_map_ops perf_event_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, - .map_free = fd_array_map_free, + .map_free = perf_event_fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1f17c6752deb..4f556cfcbfbe 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -414,6 +414,9 @@ enum { /* Enable memory-mapping BPF map */ BPF_F_MMAPABLE = (1U << 10), + +/* Share perf_event among processes */ + BPF_F_PRESERVE_ELEMS = (1U << 11), }; /* Flags for BPF_PROG_QUERY. */ -- cgit v1.3.1 From d6b42068412bd17d94b3b7b51d3787120ba8c738 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 30 Sep 2020 15:49:27 -0700 Subject: selftests/bpf: Add tests for BPF_F_PRESERVE_ELEMS Add tests for perf event array with and without BPF_F_PRESERVE_ELEMS. Add a perf event to array via fd mfd. Without BPF_F_PRESERVE_ELEMS, the perf event is removed when mfd is closed. With BPF_F_PRESERVE_ELEMS, the perf event is removed when the map is freed. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200930224927.1936644-3-songliubraving@fb.com --- .../selftests/bpf/prog_tests/pe_preserve_elems.c | 66 ++++++++++++++++++++++ .../selftests/bpf/progs/test_pe_preserve_elems.c | 38 +++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c create mode 100644 tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c new file mode 100644 index 000000000000..673d38395253 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019 Facebook */ +#include +#include +#include "test_pe_preserve_elems.skel.h" + +static int duration; + +static void test_one_map(struct bpf_map *map, struct bpf_program *prog, + bool has_share_pe) +{ + int err, key = 0, pfd = -1, mfd = bpf_map__fd(map); + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts); + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + + pfd = syscall(__NR_perf_event_open, &attr, 0 /* pid */, + -1 /* cpu 0 */, -1 /* group id */, 0 /* flags */); + if (CHECK(pfd < 0, "perf_event_open", "failed\n")) + return; + + err = bpf_map_update_elem(mfd, &key, &pfd, BPF_ANY); + close(pfd); + if (CHECK(err < 0, "bpf_map_update_elem", "failed\n")) + return; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n")) + return; + if (CHECK(opts.retval != 0, "bpf_perf_event_read_value", + "failed with %d\n", opts.retval)) + return; + + /* closing mfd, prog still holds a reference on map */ + close(mfd); + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n")) + return; + + if (has_share_pe) { + CHECK(opts.retval != 0, "bpf_perf_event_read_value", + "failed with %d\n", opts.retval); + } else { + CHECK(opts.retval != -ENOENT, "bpf_perf_event_read_value", + "should have failed with %d, but got %d\n", -ENOENT, + opts.retval); + } +} + +void test_pe_preserve_elems(void) +{ + struct test_pe_preserve_elems *skel; + + skel = test_pe_preserve_elems__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + test_one_map(skel->maps.array_1, skel->progs.read_array_1, false); + test_one_map(skel->maps.array_2, skel->progs.read_array_2, true); + + test_pe_preserve_elems__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c new file mode 100644 index 000000000000..fb22de7c365d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} array_1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(map_flags, BPF_F_PRESERVE_ELEMS); +} array_2 SEC(".maps"); + +SEC("raw_tp/sched_switch") +int BPF_PROG(read_array_1) +{ + struct bpf_perf_event_value val; + + return bpf_perf_event_read_value(&array_1, 0, &val, sizeof(val)); +} + +SEC("raw_tp/task_rename") +int BPF_PROG(read_array_2) +{ + struct bpf_perf_event_value val; + + return bpf_perf_event_read_value(&array_2, 0, &val, sizeof(val)); +} + +char LICENSE[] SEC("license") = "GPL"; -- cgit v1.3.1 From 08fc4762149c964c0fa8682a75591fe6f22dc39e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Oct 2020 11:14:22 -0300 Subject: tools beauty: Add script to generate table of mmap's 'prot' argument Will be wired up in the following csets: $ tools/perf/trace/beauty/mmap_prot.sh static const char *mmap_prot[] = { [ilog2(0x1) + 1] = "READ", #ifndef PROT_READ #define PROT_READ 0x1 #endif [ilog2(0x2) + 1] = "WRITE", #ifndef PROT_WRITE #define PROT_WRITE 0x2 #endif [ilog2(0x4) + 1] = "EXEC", #ifndef PROT_EXEC #define PROT_EXEC 0x4 #endif [ilog2(0x8) + 1] = "SEM", #ifndef PROT_SEM #define PROT_SEM 0x8 #endif [ilog2(0x01000000) + 1] = "GROWSDOWN", #ifndef PROT_GROWSDOWN #define PROT_GROWSDOWN 0x01000000 #endif [ilog2(0x02000000) + 1] = "GROWSUP", #ifndef PROT_GROWSUP #define PROT_GROWSUP 0x02000000 #endif }; $ $ $ $ tools/perf/trace/beauty/mmap_prot.sh alpha static const char *mmap_prot[] = { [ilog2(0x4) + 1] = "EXEC", #ifndef PROT_EXEC #define PROT_EXEC 0x4 #endif [ilog2(0x01000000) + 1] = "GROWSDOWN", #ifndef PROT_GROWSDOWN #define PROT_GROWSDOWN 0x01000000 #endif [ilog2(0x02000000) + 1] = "GROWSUP", #ifndef PROT_GROWSUP #define PROT_GROWSUP 0x02000000 #endif [ilog2(0x1) + 1] = "READ", #ifndef PROT_READ #define PROT_READ 0x1 #endif [ilog2(0x8) + 1] = "SEM", #ifndef PROT_SEM #define PROT_SEM 0x8 #endif [ilog2(0x2) + 1] = "WRITE", #ifndef PROT_WRITE #define PROT_WRITE 0x2 #endif }; $ Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap_prot.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100755 tools/perf/trace/beauty/mmap_prot.sh (limited to 'tools') diff --git a/tools/perf/trace/beauty/mmap_prot.sh b/tools/perf/trace/beauty/mmap_prot.sh new file mode 100755 index 000000000000..28f638f8d216 --- /dev/null +++ b/tools/perf/trace/beauty/mmap_prot.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 2 ] ; then + [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/` + asm_header_dir=tools/include/uapi/asm-generic + arch_header_dir=tools/arch/${hostarch}/include/uapi/asm +else + asm_header_dir=$1 + arch_header_dir=$2 +fi + +common_mman=${asm_header_dir}/mman-common.h +arch_mman=${arch_header_dir}/mman.h + +prefix="PROT" + +printf "static const char *mmap_prot[] = {\n" +regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}` +([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+ Date: Thu, 1 Oct 2020 11:23:38 -0300 Subject: perf trace: Use the autogenerated mmap 'prot' string/id table No change in behaviour: # perf trace -e mmap sleep 1 0.000 ( 0.009 ms): sleep/751870 mmap(len: 143317, prot: READ, flags: PRIVATE, fd: 3) = 0x7fa96d0f7000 0.028 ( 0.004 ms): sleep/751870 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7fa96d0f5000 0.037 ( 0.005 ms): sleep/751870 mmap(len: 1872744, prot: READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7fa96cf2b000 0.044 ( 0.011 ms): sleep/751870 mmap(addr: 0x7fa96cf50000, len: 1376256, prot: READ|EXEC, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x25000) = 0x7fa96cf50000 0.056 ( 0.007 ms): sleep/751870 mmap(addr: 0x7fa96d0a0000, len: 307200, prot: READ, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x175000) = 0x7fa96d0a0000 0.064 ( 0.007 ms): sleep/751870 mmap(addr: 0x7fa96d0eb000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x1bf000) = 0x7fa96d0eb000 0.075 ( 0.005 ms): sleep/751870 mmap(addr: 0x7fa96d0f1000, len: 13160, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7fa96d0f1000 0.253 ( 0.005 ms): sleep/751870 mmap(len: 218049136, prot: READ, flags: PRIVATE, fd: 3) = 0x7fa95ff38000 # # # set -o vi # strace -e mmap sleep 1 mmap(NULL, 143317, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f333bd83000 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f333bd81000 mmap(NULL, 1872744, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f333bbb7000 mmap(0x7f333bbdc000, 1376256, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x25000) = 0x7f333bbdc000 mmap(0x7f333bd2c000, 307200, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x175000) = 0x7f333bd2c000 mmap(0x7f333bd77000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bf000) = 0x7f333bd77000 mmap(0x7f333bd7d000, 13160, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f333bd7d000 mmap(NULL, 218049136, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f332ebc4000 +++ exited with 0 +++ # And you can as well tweak 'perf trace's output to more closely match strace's: # perf config trace.show_arg_names=no # perf config trace.show_duration=no # perf config trace.show_prefix=yes # perf config trace.show_timestamp=no # perf config trace.show_zeros=yes # perf config trace.no_inherit=yes # perf trace -e mmap sleep 1 mmap(NULL, 143317, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f0d287ca000 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS) = 0x7f0d287c8000 mmap(NULL, 1872744, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f0d285fe000 mmap(0x7f0d28623000, 1376256, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x25000) = 0x7f0d28623000 mmap(0x7f0d28773000, 307200, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x175000) = 0x7f0d28773000 mmap(0x7f0d287be000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bf000) = 0x7f0d287be000 mmap(0x7f0d287c4000, 13160, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS) = 0x7f0d287c4000 mmap(NULL, 218049136, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f0d1b60b000 # # perf config | grep ^trace trace.show_arg_names=no trace.show_duration=no trace.show_prefix=yes trace.show_timestamp=no trace.show_zeros=yes trace.no_inherit=yes # Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 9 +++++++++ tools/perf/trace/beauty/mmap.c | 42 +++++++++++++++--------------------------- 2 files changed, 24 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 038de61aa69f..7ce3f2e8b9c7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -555,6 +555,13 @@ move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh $(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl) $(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@ + +mmap_prot_array := $(beauty_outdir)/mmap_prot_array.c +mmap_prot_tbl := $(srctree)/tools/perf/trace/beauty/mmap_prot.sh + +$(mmap_prot_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_prot_tbl) + $(Q)$(SHELL) '$(mmap_prot_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ + prctl_option_array := $(beauty_outdir)/prctl_option_array.c prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh @@ -716,6 +723,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ $(mmap_flags_array) \ + $(mmap_prot_array) \ $(mremap_flags_array) \ $(mount_flags_array) \ $(move_mount_flags_array) \ @@ -1015,6 +1023,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(fspick_arrays) \ $(OUTPUT)$(madvise_behavior_array) \ $(OUTPUT)$(mmap_flags_array) \ + $(OUTPUT)$(mmap_prot_array) \ $(OUTPUT)$(mremap_flags_array) \ $(OUTPUT)$(mount_flags_array) \ $(OUTPUT)$(move_mount_flags_array) \ diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 58a70ffa0a88..3c5e97b93dd5 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -1,34 +1,22 @@ // SPDX-License-Identifier: LGPL-2.1 -#include #include -static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, - struct syscall_arg *arg) +#include "trace/beauty/generated/mmap_prot_array.c" +static DEFINE_STRARRAY(mmap_prot, "PROT_"); + +static size_t mmap__scnprintf_prot(unsigned long prot, char *bf, size_t size, bool show_prefix) +{ + return strarray__scnprintf_flags(&strarray__mmap_prot, bf, size, show_prefix, prot); +} + +static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg) { - const char *prot_prefix = "PROT_"; - int printed = 0, prot = arg->val; - bool show_prefix = arg->show_string_prefix; - - if (prot == PROT_NONE) - return scnprintf(bf, size, "%sNONE", show_prefix ? prot_prefix : ""); -#define P_MMAP_PROT(n) \ - if (prot & PROT_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prot_prefix :"", #n); \ - prot &= ~PROT_##n; \ - } - - P_MMAP_PROT(READ); - P_MMAP_PROT(WRITE); - P_MMAP_PROT(EXEC); - P_MMAP_PROT(SEM); - P_MMAP_PROT(GROWSDOWN); - P_MMAP_PROT(GROWSUP); -#undef P_MMAP_PROT - - if (prot) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); - - return printed; + unsigned long prot = arg->val; + + if (prot == 0) + return scnprintf(bf, size, "%sNONE", arg->show_string_prefix ? strarray__mmap_prot.prefix : ""); + + return mmap__scnprintf_prot(prot, bf, size, arg->show_string_prefix); } #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot -- cgit v1.3.1 From 6fcd5ddc3b1467b3586972ef785d0d926ae4cdf4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 28 Sep 2020 22:11:35 +0200 Subject: perf python scripting: Fix printable strings in python3 scripts Hagen reported broken strings in python3 tracepoint scripts: make PYTHON=python3 perf record -e sched:sched_switch -a -- sleep 5 perf script --gen-script py perf script -s ./perf-script.py [..] sched__sched_switch 7 563231.759525792 0 swapper prev_comm=bytearray(b'swapper/7\x00\x00\x00\x00\x00\x00\x00'), prev_pid=0, prev_prio=120, prev_state=, next_comm=bytearray(b'mutex-thread-co\x00'), The problem is in the is_printable_array function that does not take the zero byte into account and claim such string as not printable, so the code will create byte array instead of string. Committer testing: After this fix: sched__sched_switch 3 484522.497072626 1158680 kworker/3:0-eve prev_comm=kworker/3:0, prev_pid=1158680, prev_prio=120, prev_state=I, next_comm=swapper/3, next_pid=0, next_prio=120 Sample: {addr=0, cpu=3, datasrc=84410401, datasrc_decode=N/A|SNP N/A|TLB N/A|LCK N/A, ip=18446744071841817196, period=1, phys_addr=0, pid=1158680, tid=1158680, time=484522497072626, transaction=0, values=[(0, 0)], weight=0} sched__sched_switch 4 484522.497085610 1225814 perf prev_comm=perf, prev_pid=1225814, prev_prio=120, prev_state=, next_comm=migration/4, next_pid=30, next_prio=0 Sample: {addr=0, cpu=4, datasrc=84410401, datasrc_decode=N/A|SNP N/A|TLB N/A|LCK N/A, ip=18446744071841817196, period=1, phys_addr=0, pid=1225814, tid=1225814, time=484522497085610, transaction=0, values=[(0, 0)], weight=0} Fixes: 249de6e07458 ("perf script python: Fix string vs byte array resolving") Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Hagen Paul Pfeifer Cc: Alexander Shishkin Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200928201135.3633850-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print_binary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c index 599a1543871d..13fdc51c61d9 100644 --- a/tools/perf/util/print_binary.c +++ b/tools/perf/util/print_binary.c @@ -50,7 +50,7 @@ int is_printable_array(char *p, unsigned int len) len--; - for (i = 0; i < len; i++) { + for (i = 0; i < len && p[i]; i++) { if (!isprint(p[i]) && !isspace(p[i])) return 0; } -- cgit v1.3.1 From b0b8e56b82c06b3bb6e5fb66d0e9c9c3fd3ce555 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 29 Sep 2020 00:49:16 +0200 Subject: objtool: Permit __kasan_check_{read,write} under UACCESS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building linux-next with JUMP_LABEL=n and KASAN=y, I got this objtool warning: arch/x86/lib/copy_mc.o: warning: objtool: copy_mc_to_user()+0x22: call to __kasan_check_read() with UACCESS enabled What happens here is that copy_mc_to_user() branches on a static key in a UACCESS region:         __uaccess_begin();         if (static_branch_unlikely(©_mc_fragile_key))                 ret = copy_mc_fragile(to, from, len);         ret = copy_mc_generic(to, from, len);         __uaccess_end(); and the !CONFIG_JUMP_LABEL version of static_branch_unlikely() uses static_key_enabled(), which uses static_key_count(), which uses atomic_read(), which calls instrument_atomic_read(), which uses kasan_check_read(), which is __kasan_check_read(). Let's permit these KASAN helpers in UACCESS regions - static keys should probably work under UACCESS, I think. PeterZ adds: It's not a matter of permitting, it's a matter of being safe and correct. In this case it is, because it's a thin wrapper around check_memory_region() which was already marked safe. check_memory_region() is correct because the only thing it ends up calling is kasa_report() and that is also marked safe because that is annotated with user_access_save/restore() before it does anything else. On top of that, all of KASAN is noinstr, so nothing in here will end up in tracing and/or call schedule() before the user_access_save(). Signed-off-by: Jann Horn Acked-by: Peter Zijlstra (Intel) Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2df9f769412e..3d14134c4e97 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -583,6 +583,8 @@ static const char *uaccess_safe_builtin[] = { "__asan_store4_noabort", "__asan_store8_noabort", "__asan_store16_noabort", + "__kasan_check_read", + "__kasan_check_write", /* KASAN in-line */ "__asan_report_load_n_noabort", "__asan_report_load1_noabort", -- cgit v1.3.1 From cffcdbff70a3cf3cc4f42fa2e57012411b1c78a6 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 1 Oct 2020 15:54:40 -0700 Subject: selftests/bpf: Initialize duration in xdp_noinline.c Fixes clang error: tools/testing/selftests/bpf/prog_tests/xdp_noinline.c:35:6: error: variable 'duration' is uninitialized when used here [-Werror,-Wuninitialized] if (CHECK(!skel, "skel_open_and_load", "failed\n")) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201001225440.1373233-1-sdf@google.com --- tools/testing/selftests/bpf/prog_tests/xdp_noinline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index a1f06424cf83..0281095de266 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -25,7 +25,7 @@ void test_xdp_noinline(void) __u8 flags; } real_def = {.dst = MAGIC_VAL}; __u32 ch_key = 11, real_num = 3; - __u32 duration, retval, size; + __u32 duration = 0, retval, size; int err, i; __u64 bytes = 0, pkts = 0; char buf[128]; -- cgit v1.3.1 From 48ca6243c6adf1a3d7e7a0fd519ba918eb7b00ca Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 1 Oct 2020 17:04:51 -0700 Subject: selftests/bpf: Properly initialize linfo in sockmap_basic When using -Werror=missing-braces, compiler complains about missing braces. Let's use use ={} initialization which should do the job: tools/testing/selftests/bpf/prog_tests/sockmap_basic.c: In function 'test_sockmap_iter': tools/testing/selftests/bpf/prog_tests/sockmap_basic.c:181:8: error: missing braces around initializer [-Werror=missing-braces] union bpf_iter_link_info linfo = {0}; ^ tools/testing/selftests/bpf/prog_tests/sockmap_basic.c:181:8: error: (near initialization for 'linfo.map') [-Werror=missing-braces] tools/testing/selftests/bpf/prog_tests/sockmap_basic.c: At top level: Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201002000451.1794044-1-sdf@google.com --- tools/testing/selftests/bpf/prog_tests/sockmap_basic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 4c4224e3e10a..85f73261fab0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -198,7 +198,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, len, src_fd, iter_fd, duration = 0; - union bpf_iter_link_info linfo = {0}; + union bpf_iter_link_info linfo = {}; __u32 i, num_sockets, num_elems; struct bpf_iter_sockmap *skel; __s64 *sock_fd = NULL; -- cgit v1.3.1 From 82206a0c06ccbd4a80cd623b9d52fcda130d6c7d Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Mon, 28 Sep 2020 13:02:27 -0700 Subject: kunit: tool: handle when .kunit exists but .kunitconfig does not Right now .kunitconfig and the build dir are automatically created if the build dir does not exists; however, if the build dir is present and .kunitconfig is not, kunit_tool will crash. Fix this by checking for both the build dir as well as the .kunitconfig. NOTE: This depends on commit 5578d008d9e0 ("kunit: tool: fix running kunit_tool from outside kernel tree") Link: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/?id=5578d008d9e06bb531fb3e62dd17096d9fd9c853 Signed-off-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 3c95a0eb0d04..ebf5f5763dee 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -261,6 +261,8 @@ def main(argv, linux=None): if cli_args.subcommand == 'run': if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) + + if not os.path.exists(kunit_kernel.kunitconfig_path): create_default_kunitconfig() if not linux: @@ -277,10 +279,12 @@ def main(argv, linux=None): if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'config': - if cli_args.build_dir: - if not os.path.exists(cli_args.build_dir): - os.mkdir(cli_args.build_dir) - create_default_kunitconfig() + if cli_args.build_dir and ( + not os.path.exists(cli_args.build_dir)): + os.mkdir(cli_args.build_dir) + + if not os.path.exists(kunit_kernel.kunitconfig_path): + create_default_kunitconfig() if not linux: linux = kunit_kernel.LinuxSourceTree() -- cgit v1.3.1 From aa803771a80aa2aa2d5cdd38434b369066fbb8fc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 24 Aug 2020 21:56:26 -0700 Subject: tools: Avoid comma separated statements Use semicolons and braces. Signed-off-by: Joe Perches Signed-off-by: Shuah Khan --- tools/lib/subcmd/help.c | 10 +- tools/power/cpupower/utils/cpufreq-set.c | 14 +- tools/testing/selftests/vm/gup_benchmark.c | 18 +- tools/testing/selftests/vm/userfaultfd.c | 296 ++++++++++++++++++----------- 4 files changed, 210 insertions(+), 128 deletions(-) (limited to 'tools') diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index 2859f107abc8..bf02d62a3b2b 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -65,12 +65,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) ci = cj = ei = 0; while (ci < cmds->cnt && ei < excludes->cnt) { cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); - if (cmp < 0) + if (cmp < 0) { cmds->names[cj++] = cmds->names[ci++]; - else if (cmp == 0) - ci++, ei++; - else if (cmp > 0) + } else if (cmp == 0) { + ci++; ei++; + } else if (cmp > 0) { + ei++; + } } while (ci < cmds->cnt) diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 6ed82fba5aaa..7b2164e07057 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -99,13 +99,17 @@ static unsigned long string_to_frequency(const char *str) continue; if (str[cp] == '.') { - while (power > -1 && isdigit(str[cp+1])) - cp++, power--; + while (power > -1 && isdigit(str[cp+1])) { + cp++; + power--; + } } - if (power >= -1) /* not enough => pad */ + if (power >= -1) { /* not enough => pad */ pad = power + 1; - else /* to much => strip */ - pad = 0, cp += power + 1; + } else { /* too much => strip */ + pad = 0; + cp += power + 1; + } /* check bounds */ if (cp <= 0 || cp + pad > NORM_FREQ_LEN - 1) return 0; diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 43b4dfe161a2..d69f0eb0d8c0 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -105,12 +105,16 @@ int main(int argc, char **argv) gup.flags |= FOLL_WRITE; fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR); - if (fd == -1) - perror("open"), exit(1); + if (fd == -1) { + perror("open"); + exit(1); + } p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); - if (p == MAP_FAILED) - perror("mmap"), exit(1); + if (p == MAP_FAILED) { + perror("mmap"); + exit(1); + } gup.addr = (unsigned long)p; if (thp == 1) @@ -123,8 +127,10 @@ int main(int argc, char **argv) for (i = 0; i < repeats; i++) { gup.size = size; - if (ioctl(fd, cmd, &gup)) - perror("ioctl"), exit(1); + if (ioctl(fd, cmd, &gup)) { + perror("ioctl"); + exit(1); + } printf("Time: get:%lld put:%lld us", gup.get_delta_usec, gup.put_delta_usec); diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 61e5cfeb1350..9b0912a01777 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -227,8 +227,10 @@ static void hugetlb_allocate_area(void **alloc_area) huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) { - if (munmap(*alloc_area, nr_pages * page_size) < 0) - perror("hugetlb munmap"), exit(1); + if (munmap(*alloc_area, nr_pages * page_size) < 0) { + perror("hugetlb munmap"); + exit(1); + } *alloc_area = NULL; return; } @@ -337,9 +339,10 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp) /* Undo write-protect, do wakeup after that */ prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0; - if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) - fprintf(stderr, "clear WP failed for address 0x%Lx\n", - start), exit(1); + if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) { + fprintf(stderr, "clear WP failed for address 0x%Lx\n", start); + exit(1); + } } static void *locking_thread(void *arg) @@ -359,8 +362,10 @@ static void *locking_thread(void *arg) seed += cpu; bzero(&rand, sizeof(rand)); bzero(&randstate, sizeof(randstate)); - if (initstate_r(seed, randstate, sizeof(randstate), &rand)) - fprintf(stderr, "srandom_r error\n"), exit(1); + if (initstate_r(seed, randstate, sizeof(randstate), &rand)) { + fprintf(stderr, "srandom_r error\n"); + exit(1); + } } else { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) @@ -369,12 +374,16 @@ static void *locking_thread(void *arg) while (!finished) { if (bounces & BOUNCE_RANDOM) { - if (random_r(&rand, &rand_nr)) - fprintf(stderr, "random_r 1 error\n"), exit(1); + if (random_r(&rand, &rand_nr)) { + fprintf(stderr, "random_r 1 error\n"); + exit(1); + } page_nr = rand_nr; if (sizeof(page_nr) > sizeof(rand_nr)) { - if (random_r(&rand, &rand_nr)) - fprintf(stderr, "random_r 2 error\n"), exit(1); + if (random_r(&rand, &rand_nr)) { + fprintf(stderr, "random_r 2 error\n"); + exit(1); + } page_nr |= (((unsigned long) rand_nr) << 16) << 16; } @@ -385,11 +394,13 @@ static void *locking_thread(void *arg) start = time(NULL); if (bounces & BOUNCE_VERIFY) { count = *area_count(area_dst, page_nr); - if (!count) + if (!count) { fprintf(stderr, "page_nr %lu wrong count %Lu %Lu\n", page_nr, count, - count_verify[page_nr]), exit(1); + count_verify[page_nr]); + exit(1); + } /* @@ -401,11 +412,12 @@ static void *locking_thread(void *arg) */ #if 1 if (!my_bcmp(area_dst + page_nr * page_size, zeropage, - page_size)) + page_size)) { fprintf(stderr, "my_bcmp page_nr %lu wrong count %Lu %Lu\n", - page_nr, count, - count_verify[page_nr]), exit(1); + page_nr, count, count_verify[page_nr]); + exit(1); + } #else unsigned long loops; @@ -437,7 +449,7 @@ static void *locking_thread(void *arg) fprintf(stderr, "page_nr %lu memory corruption %Lu %Lu\n", page_nr, count, - count_verify[page_nr]), exit(1); + count_verify[page_nr]); exit(1); } count++; *area_count(area_dst, page_nr) = count_verify[page_nr] = count; @@ -461,12 +473,14 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, offset); if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ - if (uffdio_copy->copy != -EEXIST) + if (uffdio_copy->copy != -EEXIST) { fprintf(stderr, "UFFDIO_COPY retry error %Ld\n", - uffdio_copy->copy), exit(1); + uffdio_copy->copy); + exit(1); + } } else { fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n", - uffdio_copy->copy), exit(1); + uffdio_copy->copy); exit(1); } } @@ -474,9 +488,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry) { struct uffdio_copy uffdio_copy; - if (offset >= nr_pages * page_size) - fprintf(stderr, "unexpected offset %lu\n", - offset), exit(1); + if (offset >= nr_pages * page_size) { + fprintf(stderr, "unexpected offset %lu\n", offset); + exit(1); + } uffdio_copy.dst = (unsigned long) area_dst + offset; uffdio_copy.src = (unsigned long) area_src + offset; uffdio_copy.len = page_size; @@ -487,12 +502,14 @@ static int __copy_page(int ufd, unsigned long offset, bool retry) uffdio_copy.copy = 0; if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ - if (uffdio_copy.copy != -EEXIST) + if (uffdio_copy.copy != -EEXIST) { fprintf(stderr, "UFFDIO_COPY error %Ld\n", - uffdio_copy.copy), exit(1); + uffdio_copy.copy); + exit(1); + } } else if (uffdio_copy.copy != page_size) { fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", - uffdio_copy.copy), exit(1); + uffdio_copy.copy); exit(1); } else { if (test_uffdio_copy_eexist && retry) { test_uffdio_copy_eexist = false; @@ -521,11 +538,11 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg) if (ret < 0) { if (errno == EAGAIN) return 1; - else - perror("blocking read error"), exit(1); + perror("blocking read error"); } else { - fprintf(stderr, "short read\n"), exit(1); + fprintf(stderr, "short read\n"); } + exit(1); } return 0; @@ -536,9 +553,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, { unsigned long offset; - if (msg->event != UFFD_EVENT_PAGEFAULT) - fprintf(stderr, "unexpected msg event %u\n", - msg->event), exit(1); + if (msg->event != UFFD_EVENT_PAGEFAULT) { + fprintf(stderr, "unexpected msg event %u\n", msg->event); + exit(1); + } if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { wp_range(uffd, msg->arg.pagefault.address, page_size, false); @@ -546,8 +564,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, } else { /* Missing page faults */ if (bounces & BOUNCE_VERIFY && - msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); + msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) { + fprintf(stderr, "unexpected write fault\n"); + exit(1); + } offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; offset &= ~(page_size-1); @@ -574,25 +594,32 @@ static void *uffd_poll_thread(void *arg) for (;;) { ret = poll(pollfd, 2, -1); - if (!ret) - fprintf(stderr, "poll error %d\n", ret), exit(1); - if (ret < 0) - perror("poll"), exit(1); + if (!ret) { + fprintf(stderr, "poll error %d\n", ret); + exit(1); + } + if (ret < 0) { + perror("poll"); + exit(1); + } if (pollfd[1].revents & POLLIN) { - if (read(pollfd[1].fd, &tmp_chr, 1) != 1) - fprintf(stderr, "read pipefd error\n"), - exit(1); + if (read(pollfd[1].fd, &tmp_chr, 1) != 1) { + fprintf(stderr, "read pipefd error\n"); + exit(1); + } break; } - if (!(pollfd[0].revents & POLLIN)) + if (!(pollfd[0].revents & POLLIN)) { fprintf(stderr, "pollfd[0].revents %d\n", - pollfd[0].revents), exit(1); + pollfd[0].revents); + exit(1); + } if (uffd_read_msg(uffd, &msg)) continue; switch (msg.event) { default: fprintf(stderr, "unexpected msg event %u\n", - msg.event), exit(1); + msg.event); exit(1); break; case UFFD_EVENT_PAGEFAULT: uffd_handle_page_fault(&msg, stats); @@ -606,8 +633,10 @@ static void *uffd_poll_thread(void *arg) uffd_reg.range.start = msg.arg.remove.start; uffd_reg.range.len = msg.arg.remove.end - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) - fprintf(stderr, "remove failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) { + fprintf(stderr, "remove failure\n"); + exit(1); + } break; case UFFD_EVENT_REMAP: area_dst = (char *)(unsigned long)msg.arg.remap.to; @@ -879,8 +908,10 @@ static int faulting_process(int signal_test) area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) - perror("mremap"), exit(1); + if (area_dst == MAP_FAILED) { + perror("mremap"); + exit(1); + } for (; nr < nr_pages; nr++) { count = *area_count(area_dst, nr); @@ -888,7 +919,7 @@ static int faulting_process(int signal_test) fprintf(stderr, "nr %lu memory corruption %Lu %Lu\n", nr, count, - count_verify[nr]), exit(1); + count_verify[nr]); exit(1); } /* * Trigger write protection if there is by writting @@ -901,8 +932,10 @@ static int faulting_process(int signal_test) return 1; for (nr = 0; nr < nr_pages; nr++) { - if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) - fprintf(stderr, "nr %lu is not zero\n", nr), exit(1); + if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) { + fprintf(stderr, "nr %lu is not zero\n", nr); + exit(1); + } } return 0; @@ -916,12 +949,14 @@ static void retry_uffdio_zeropage(int ufd, uffdio_zeropage->range.len, offset); if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { - if (uffdio_zeropage->zeropage != -EEXIST) + if (uffdio_zeropage->zeropage != -EEXIST) { fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n", - uffdio_zeropage->zeropage), exit(1); + uffdio_zeropage->zeropage); + exit(1); + } } else { fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n", - uffdio_zeropage->zeropage), exit(1); + uffdio_zeropage->zeropage); exit(1); } } @@ -933,9 +968,10 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); - if (offset >= nr_pages * page_size) - fprintf(stderr, "unexpected offset %lu\n", - offset), exit(1); + if (offset >= nr_pages * page_size) { + fprintf(stderr, "unexpected offset %lu\n", offset); + exit(1); + } uffdio_zeropage.range.start = (unsigned long) area_dst + offset; uffdio_zeropage.range.len = page_size; uffdio_zeropage.mode = 0; @@ -943,22 +979,26 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) if (ret) { /* real retval in ufdio_zeropage.zeropage */ if (has_zeropage) { - if (uffdio_zeropage.zeropage == -EEXIST) - fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"), - exit(1); - else + if (uffdio_zeropage.zeropage == -EEXIST) { + fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"); + exit(1); + } else { fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); + exit(1); + } } else { - if (uffdio_zeropage.zeropage != -EINVAL) + if (uffdio_zeropage.zeropage != -EINVAL) { fprintf(stderr, "UFFDIO_ZEROPAGE not -EINVAL %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); + exit(1); + } } } else if (has_zeropage) { if (uffdio_zeropage.zeropage != page_size) { fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); exit(1); } else { if (test_uffdio_zeropage_eexist && retry) { test_uffdio_zeropage_eexist = false; @@ -970,7 +1010,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) } else { fprintf(stderr, "UFFDIO_ZEROPAGE succeeded %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); exit(1); } return 0; @@ -1000,19 +1040,24 @@ static int userfaultfd_zeropage_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) + expected_ioctls) { fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + "unexpected missing ioctl for anon memory\n"); + exit(1); + } if (uffdio_zeropage(uffd, 0)) { - if (my_bcmp(area_dst, zeropage, page_size)) - fprintf(stderr, "zeropage is not zero\n"), exit(1); + if (my_bcmp(area_dst, zeropage, page_size)) { + fprintf(stderr, "zeropage is not zero\n"); + exit(1); + } } close(uffd); @@ -1047,32 +1092,41 @@ static int userfaultfd_events_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { + fprintf(stderr, "unexpected missing ioctl for anon memory\n"); + exit(1); + } - if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) - perror("uffd_poll_thread create"), exit(1); + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) { + perror("uffd_poll_thread create"); + exit(1); + } pid = fork(); - if (pid < 0) - perror("fork"), exit(1); + if (pid < 0) { + perror("fork"); + exit(1); + } if (!pid) return faulting_process(0); waitpid(pid, &err, 0); - if (err) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (err) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) - perror("pipe write"), exit(1); + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) { + perror("pipe write"); + exit(1); + } if (pthread_join(uffd_mon, NULL)) return 1; @@ -1110,38 +1164,49 @@ static int userfaultfd_sig_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { + fprintf(stderr, "unexpected missing ioctl for anon memory\n"); + exit(1); + } - if (faulting_process(1)) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (faulting_process(1)) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } if (uffd_test_ops->release_pages(area_dst)) return 1; - if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) - perror("uffd_poll_thread create"), exit(1); + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) { + perror("uffd_poll_thread create"); + exit(1); + } pid = fork(); - if (pid < 0) - perror("fork"), exit(1); + if (pid < 0) { + perror("fork"); + exit(1); + } if (!pid) exit(faulting_process(2)); waitpid(pid, &err, 0); - if (err) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (err) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) - perror("pipe write"), exit(1); + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) { + perror("pipe write"); + exit(1); + } if (pthread_join(uffd_mon, (void **)&userfaults)) return 1; @@ -1395,7 +1460,7 @@ static void set_test_type(const char *type) test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else { - fprintf(stderr, "Unknown test type: %s\n", type), exit(1); + fprintf(stderr, "Unknown test type: %s\n", type); exit(1); } if (test_type == TEST_HUGETLB) @@ -1403,12 +1468,15 @@ static void set_test_type(const char *type) else page_size = sysconf(_SC_PAGE_SIZE); - if (!page_size) - fprintf(stderr, "Unable to determine page size\n"), - exit(2); + if (!page_size) { + fprintf(stderr, "Unable to determine page size\n"); + exit(2); + } if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) - fprintf(stderr, "Impossible to run this test\n"), exit(2); + > page_size) { + fprintf(stderr, "Impossible to run this test\n"); + exit(2); + } } static void sigalrm(int sig) @@ -1425,8 +1493,10 @@ int main(int argc, char **argv) if (argc < 4) usage(); - if (signal(SIGALRM, sigalrm) == SIG_ERR) - fprintf(stderr, "failed to arm SIGALRM"), exit(1); + if (signal(SIGALRM, sigalrm) == SIG_ERR) { + fprintf(stderr, "failed to arm SIGALRM"); + exit(1); + } alarm(ALARM_INTERVAL_SECS); set_test_type(argv[1]); -- cgit v1.3.1 From 05c36e5adf2beae64d118f49fcaafbe8ce373db1 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 25 Sep 2020 17:21:04 +0200 Subject: tools/power/acpi: Serialize Makefile Before this patch you get tools/power/acpi/Makefile.rules included in parallel trying to copy KERNEL_INCLUDE multiple times: make -j20 acpi DESCEND power/acpi DESCEND tools/acpidbg DESCEND tools/acpidump DESCEND tools/ec MKDIR include MKDIR include MKDIR include CP include CP include cp: cannot create directory '/home/abuild/rpmbuild/BUILD/linux-5.7.7+git20200917.10b82d517648/tools/power/acpi/include/acpi': File exists make[2]: *** [../../Makefile.rules:20: /home/abuild/rpmbuild/BUILD/linux-5.7.7+git20200917.10b82d517648/tools/power/acpi/include] Error 1 make[1]: *** [Makefile:16: acpidbg] Error 2 make[1]: *** Waiting for unfinished jobs.... with this patch each subdirectory will be processed serialized: DESCEND power/acpi DESCEND tools/acpidbg MKDIR include CP include CC tools/acpidbg/acpidbg.o LD acpidbg STRIP acpidbg DESCEND tools/acpidump CC tools/acpidump/apdump.o ... LD acpidump STRIP acpidump DESCEND tools/ec CC tools/ec/ec_access.o LD ec STRIP ec Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile index ebd3e1a1c28e..a249c50ebf55 100644 --- a/tools/power/acpi/Makefile +++ b/tools/power/acpi/Makefile @@ -7,6 +7,8 @@ include ../../scripts/Makefile.include +.NOTPARALLEL: + all: acpidbg acpidump ec clean: acpidbg_clean acpidump_clean ec_clean install: acpidbg_install acpidump_install ec_install -- cgit v1.3.1 From 96d46c508506136ed35c4b02d74ce38e91d88421 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Oct 2020 18:34:54 -0700 Subject: bpf: selftest: Ensure the child sk inherited all bpf_sock_ops_cb_flags This patch adds a test to ensure the child sk inherited everything from the bpf_sock_ops_cb_flags of the listen sk: 1. Sets one more cb_flags (BPF_SOCK_OPS_STATE_CB_FLAG) to the listen sk in test_tcp_hdr_options.c 2. Saves the skops->bpf_sock_ops_cb_flags when handling the newly established passive connection 3. CHECK() it is the same as the listen sk This also covers the fastopen case as the existing test_tcp_hdr_options.c does. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201002013454.2542367-1-kafai@fb.com --- tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c | 12 ++++++++++++ .../testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c | 4 ++-- tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c | 7 +++++-- tools/testing/selftests/bpf/test_tcp_hdr_options.h | 5 +++-- 4 files changed, 22 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 24ba0d21b641..c86e67214a9e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -264,9 +264,19 @@ static int check_error_linum(const struct sk_fds *sk_fds) static void check_hdr_and_close_fds(struct sk_fds *sk_fds) { + const __u32 expected_inherit_cb_flags = + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_STATE_CB_FLAG; + if (sk_fds_shutdown(sk_fds)) goto check_linum; + if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags, + "Unexpected inherit_cb_flags", "0x%x != 0x%x\n", + skel->bss->inherit_cb_flags, expected_inherit_cb_flags)) + goto check_linum; + if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd, "passive_hdr_stg")) goto check_linum; @@ -321,6 +331,8 @@ static void reset_test(void) memset(&skel->bss->active_estab_in, 0, optsize); memset(&skel->bss->active_fin_in, 0, optsize); + skel->bss->inherit_cb_flags = 0; + skel->data->test_kind = TCPOPT_EXP; skel->data->test_magic = 0xeB9F; diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index 3a216d1d0226..72ec0178f653 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -304,10 +304,10 @@ int misc_estab(struct bpf_sock_ops *skops) passive_lport_n = __bpf_htons(passive_lport_h); bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, &true_val, sizeof(true_val)); - set_hdr_cb_flags(skops); + set_hdr_cb_flags(skops, 0); break; case BPF_SOCK_OPS_TCP_CONNECT_CB: - set_hdr_cb_flags(skops); + set_hdr_cb_flags(skops, 0); break; case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: return handle_parse_hdr(skops); diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c index 9197a23df3da..678bd0fad29e 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -21,6 +21,7 @@ __u8 test_kind = TCPOPT_EXP; __u16 test_magic = 0xeB9F; +__u32 inherit_cb_flags = 0; struct bpf_test_option passive_synack_out = {}; struct bpf_test_option passive_fin_out = {}; @@ -467,6 +468,8 @@ static int handle_passive_estab(struct bpf_sock_ops *skops) struct tcphdr *th; int err; + inherit_cb_flags = skops->bpf_sock_ops_cb_flags; + err = load_option(skops, &passive_estab_in, true); if (err == -ENOENT) { /* saved_syn is not found. It was in syncookie mode. @@ -600,10 +603,10 @@ int estab(struct bpf_sock_ops *skops) case BPF_SOCK_OPS_TCP_LISTEN_CB: bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, &true_val, sizeof(true_val)); - set_hdr_cb_flags(skops); + set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG); break; case BPF_SOCK_OPS_TCP_CONNECT_CB: - set_hdr_cb_flags(skops); + set_hdr_cb_flags(skops, 0); break; case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: return handle_parse_hdr(skops); diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h index 78a8cf9eab42..6118e3ab61fc 100644 --- a/tools/testing/selftests/bpf/test_tcp_hdr_options.h +++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h @@ -110,12 +110,13 @@ static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops) BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)); } -static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops) +static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra) { bpf_sock_ops_cb_flags_set(skops, skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | - BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | + extra); } static inline void clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops) -- cgit v1.3.1 From 4976b718c3551faba2c0616ef55ebeb74db1c5ca Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:44 -0700 Subject: bpf: Introduce pseudo_btf_id Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a ksym so that further dereferences on the ksym can use the BTF info to validate accesses. Internally, when seeing a pseudo_btf_id ld insn, the verifier reads the btf_id stored in the insn[0]'s imm field and marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND, which is encoded in btf_vminux by pahole. If the VAR is not of a struct type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID and the mem_size is resolved to the size of the VAR's type. >From the VAR btf_id, the verifier can also read the address of the ksym's corresponding kernel var from kallsyms and use that to fill dst_reg. Therefore, the proper functionality of pseudo_btf_id depends on (1) kallsyms and (2) the encoding of kernel global VARs in pahole, which should be available since pahole v1.18. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-2-haoluo@google.com --- include/linux/bpf_verifier.h | 7 +++ include/linux/btf.h | 15 +++++ include/uapi/linux/bpf.h | 36 +++++++++--- kernel/bpf/btf.c | 15 ----- kernel/bpf/verifier.c | 125 ++++++++++++++++++++++++++++++++++++----- tools/include/uapi/linux/bpf.h | 36 +++++++++--- 6 files changed, 188 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 363b4f1c562a..e83ef6f6bf43 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -308,6 +308,13 @@ struct bpf_insn_aux_data { u32 map_index; /* index into used_maps[] */ u32 map_off; /* offset from value base address */ }; + struct { + enum bpf_reg_type reg_type; /* type of pseudo_btf_id */ + union { + u32 btf_id; /* btf_id for struct typed var */ + u32 mem_size; /* mem_size for non-struct typed var */ + }; + } btf_var; }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 024e16ff7dcc..af1244180588 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -145,6 +145,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; } +static inline bool btf_type_is_var(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; +} + +/* union is only a special case of struct: + * all its offsetof(member) == 0 + */ +static inline bool btf_type_is_struct(const struct btf_type *t) +{ + u8 kind = BTF_INFO_KIND(t->info); + + return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; +} + static inline u16 btf_type_vlen(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4f556cfcbfbe..2aa156af24d6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -356,18 +356,36 @@ enum bpf_link_type { #define BPF_F_SLEEPABLE (1U << 4) /* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD + * insn[0].imm: map fd + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 +/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_VALUE 2 +/* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ +#define BPF_PSEUDO_BTF_ID 3 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4d0ee7839fdb..00569afe3d0d 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -440,16 +440,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t) return !t || btf_type_nosize(t); } -/* union is only a special case of struct: - * all its offsetof(member) == 0 - */ -static bool btf_type_is_struct(const struct btf_type *t) -{ - u8 kind = BTF_INFO_KIND(t->info); - - return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; -} - static bool __btf_type_is_struct(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; @@ -460,11 +450,6 @@ static bool btf_type_is_array(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; } -static bool btf_type_is_var(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; -} - static bool btf_type_is_datasec(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 015a1c074b6b..fe4965079773 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7488,6 +7488,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_insn_aux_data *aux = cur_aux(env); struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *dst_reg; struct bpf_map *map; int err; @@ -7504,25 +7505,44 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; + dst_reg = ®s[insn->dst_reg]; if (insn->src_reg == 0) { u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; - regs[insn->dst_reg].type = SCALAR_VALUE; + dst_reg->type = SCALAR_VALUE; __mark_reg_known(®s[insn->dst_reg], imm); return 0; } + if (insn->src_reg == BPF_PSEUDO_BTF_ID) { + mark_reg_known_zero(env, regs, insn->dst_reg); + + dst_reg->type = aux->btf_var.reg_type; + switch (dst_reg->type) { + case PTR_TO_MEM: + dst_reg->mem_size = aux->btf_var.mem_size; + break; + case PTR_TO_BTF_ID: + dst_reg->btf_id = aux->btf_var.btf_id; + break; + default: + verbose(env, "bpf verifier is misconfigured\n"); + return -EFAULT; + } + return 0; + } + map = env->used_maps[aux->map_index]; mark_reg_known_zero(env, regs, insn->dst_reg); - regs[insn->dst_reg].map_ptr = map; + dst_reg->map_ptr = map; if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { - regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; - regs[insn->dst_reg].off = aux->map_off; + dst_reg->type = PTR_TO_MAP_VALUE; + dst_reg->off = aux->map_off; if (map_value_has_spin_lock(map)) - regs[insn->dst_reg].id = ++env->id_gen; + dst_reg->id = ++env->id_gen; } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { - regs[insn->dst_reg].type = CONST_PTR_TO_MAP; + dst_reg->type = CONST_PTR_TO_MAP; } else { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; @@ -9424,6 +9444,73 @@ process_bpf_exit: return 0; } +/* replace pseudo btf_id with kernel symbol address */ +static int check_pseudo_btf_id(struct bpf_verifier_env *env, + struct bpf_insn *insn, + struct bpf_insn_aux_data *aux) +{ + u32 type, id = insn->imm; + const struct btf_type *t; + const char *sym_name; + u64 addr; + + if (!btf_vmlinux) { + verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); + return -EINVAL; + } + + if (insn[1].imm != 0) { + verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n"); + return -EINVAL; + } + + t = btf_type_by_id(btf_vmlinux, id); + if (!t) { + verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id); + return -ENOENT; + } + + if (!btf_type_is_var(t)) { + verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", + id); + return -EINVAL; + } + + sym_name = btf_name_by_offset(btf_vmlinux, t->name_off); + addr = kallsyms_lookup_name(sym_name); + if (!addr) { + verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", + sym_name); + return -ENOENT; + } + + insn[0].imm = (u32)addr; + insn[1].imm = addr >> 32; + + type = t->type; + t = btf_type_skip_modifiers(btf_vmlinux, type, NULL); + if (!btf_type_is_struct(t)) { + const struct btf_type *ret; + const char *tname; + u32 tsize; + + /* resolve the type size of ksym. */ + ret = btf_resolve_size(btf_vmlinux, t, &tsize); + if (IS_ERR(ret)) { + tname = btf_name_by_offset(btf_vmlinux, t->name_off); + verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", + tname, PTR_ERR(ret)); + return -EINVAL; + } + aux->btf_var.reg_type = PTR_TO_MEM; + aux->btf_var.mem_size = tsize; + } else { + aux->btf_var.reg_type = PTR_TO_BTF_ID; + aux->btf_var.btf_id = type; + } + return 0; +} + static int check_map_prealloc(struct bpf_map *map) { return (map->map_type != BPF_MAP_TYPE_HASH && @@ -9534,10 +9621,14 @@ static bool bpf_map_is_cgroup_storage(struct bpf_map *map) map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); } -/* look for pseudo eBPF instructions that access map FDs and - * replace them with actual map pointers +/* find and rewrite pseudo imm in ld_imm64 instructions: + * + * 1. if it accesses map FD, replace it with actual map pointer. + * 2. if it accesses btf_id of a VAR, replace it with pointer to the var. + * + * NOTE: btf_vmlinux is required for converting pseudo btf_id. */ -static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) +static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -9578,6 +9669,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) /* valid generic load 64-bit imm */ goto next_insn; + if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) { + aux = &env->insn_aux_data[i]; + err = check_pseudo_btf_id(env, insn, aux); + if (err) + return err; + goto next_insn; + } + /* In final convert_pseudo_ld_imm64() step, this is * converted into regular 64-bit imm load insn. */ @@ -11633,10 +11732,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; - ret = replace_map_fd_with_map_ptr(env); - if (ret < 0) - goto skip_full_check; - if (bpf_prog_is_dev_bound(env->prog->aux)) { ret = bpf_prog_offload_verifier_prep(env->prog); if (ret) @@ -11662,6 +11757,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (ret) goto skip_full_check; + ret = resolve_pseudo_ldimm64(env); + if (ret < 0) + goto skip_full_check; + ret = check_cfg(env); if (ret < 0) goto skip_full_check; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4f556cfcbfbe..2aa156af24d6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -356,18 +356,36 @@ enum bpf_link_type { #define BPF_F_SLEEPABLE (1U << 4) /* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD + * insn[0].imm: map fd + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 +/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_VALUE 2 +/* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ +#define BPF_PSEUDO_BTF_ID 3 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function -- cgit v1.3.1 From d370bbe1214381f444cbd9cdf3b8647223abb536 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:45 -0700 Subject: bpf/libbpf: BTF support for typed ksyms If a ksym is defined with a type, libbpf will try to find the ksym's btf information from kernel btf. If a valid btf entry for the ksym is found, libbpf can pass in the found btf id to the verifier, which validates the ksym's type and value. Typeless ksyms (i.e. those defined as 'void') will not have such btf_id, but it has the symbol's address (read from kallsyms) and its value is treated as a raw pointer. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-3-haoluo@google.com --- tools/lib/bpf/libbpf.c | 112 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 99 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a4f55f8a460d..9b36c52b8511 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -390,6 +390,12 @@ struct extern_desc { } kcfg; struct { unsigned long long addr; + + /* target btf_id of the corresponding kernel var. */ + int vmlinux_btf_id; + + /* local btf_id of the ksym extern's type. */ + __u32 type_id; } ksym; }; }; @@ -2522,12 +2528,23 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) { bool need_vmlinux_btf = false; struct bpf_program *prog; - int err; + int i, err; /* CO-RE relocations need kernel BTF */ if (obj->btf_ext && obj->btf_ext->core_relo_info.len) need_vmlinux_btf = true; + /* Support for typed ksyms needs kernel BTF */ + for (i = 0; i < obj->nr_extern; i++) { + const struct extern_desc *ext; + + ext = &obj->externs[i]; + if (ext->type == EXT_KSYM && ext->ksym.type_id) { + need_vmlinux_btf = true; + break; + } + } + bpf_object__for_each_program(prog, obj) { if (!prog->load) continue; @@ -3156,16 +3173,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { - const struct btf_type *vt; - ksym_sec = sec; ext->type = EXT_KSYM; - - vt = skip_mods_and_typedefs(obj->btf, t->type, NULL); - if (!btf_is_void(vt)) { - pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name); - return -ENOTSUP; - } + skip_mods_and_typedefs(obj->btf, t->type, + &ext->ksym.type_id); } else { pr_warn("unrecognized extern section '%s'\n", sec_name); return -ENOTSUP; @@ -5800,8 +5811,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; insn[1].imm = ext->kcfg.data_off; } else /* EXT_KSYM */ { - insn[0].imm = (__u32)ext->ksym.addr; - insn[1].imm = ext->ksym.addr >> 32; + if (ext->ksym.type_id) { /* typed ksyms */ + insn[0].src_reg = BPF_PSEUDO_BTF_ID; + insn[0].imm = ext->ksym.vmlinux_btf_id; + } else { /* typeless ksyms */ + insn[0].imm = (__u32)ext->ksym.addr; + insn[1].imm = ext->ksym.addr >> 32; + } } relo->processed = true; break; @@ -6933,10 +6949,72 @@ out: return err; } +static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) +{ + struct extern_desc *ext; + int i, id; + + for (i = 0; i < obj->nr_extern; i++) { + const struct btf_type *targ_var, *targ_type; + __u32 targ_type_id, local_type_id; + const char *targ_var_name; + int ret; + + ext = &obj->externs[i]; + if (ext->type != EXT_KSYM || !ext->ksym.type_id) + continue; + + id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name, + BTF_KIND_VAR); + if (id <= 0) { + pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n", + ext->name); + return -ESRCH; + } + + /* find local type_id */ + local_type_id = ext->ksym.type_id; + + /* find target type_id */ + targ_var = btf__type_by_id(obj->btf_vmlinux, id); + targ_var_name = btf__name_by_offset(obj->btf_vmlinux, + targ_var->name_off); + targ_type = skip_mods_and_typedefs(obj->btf_vmlinux, + targ_var->type, + &targ_type_id); + + ret = bpf_core_types_are_compat(obj->btf, local_type_id, + obj->btf_vmlinux, targ_type_id); + if (ret <= 0) { + const struct btf_type *local_type; + const char *targ_name, *local_name; + + local_type = btf__type_by_id(obj->btf, local_type_id); + local_name = btf__name_by_offset(obj->btf, + local_type->name_off); + targ_name = btf__name_by_offset(obj->btf_vmlinux, + targ_type->name_off); + + pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", + ext->name, local_type_id, + btf_kind_str(local_type), local_name, targ_type_id, + btf_kind_str(targ_type), targ_name); + return -EINVAL; + } + + ext->is_set = true; + ext->ksym.vmlinux_btf_id = id; + pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n", + ext->name, id, btf_kind_str(targ_var), targ_var_name); + } + return 0; +} + static int bpf_object__resolve_externs(struct bpf_object *obj, const char *extra_kconfig) { bool need_config = false, need_kallsyms = false; + bool need_vmlinux_btf = false; struct extern_desc *ext; void *kcfg_data = NULL; int err, i; @@ -6967,7 +7045,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, strncmp(ext->name, "CONFIG_", 7) == 0) { need_config = true; } else if (ext->type == EXT_KSYM) { - need_kallsyms = true; + if (ext->ksym.type_id) + need_vmlinux_btf = true; + else + need_kallsyms = true; } else { pr_warn("unrecognized extern '%s'\n", ext->name); return -EINVAL; @@ -6996,6 +7077,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, if (err) return -EINVAL; } + if (need_vmlinux_btf) { + err = bpf_object__resolve_ksyms_btf_id(obj); + if (err) + return -EINVAL; + } for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; @@ -7028,10 +7114,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) } err = bpf_object__probe_loading(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__sanitize_maps(obj); - err = err ? : bpf_object__load_vmlinux_btf(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); err = err ? : bpf_object__create_maps(obj); err = err ? : bpf_object__relocate(obj, attr->target_btf_path); -- cgit v1.3.1 From 2c2f6abeff1322a61f5ad7ce0ecccabf53e4fad5 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:46 -0700 Subject: selftests/bpf: Ksyms_btf to test typed ksyms Selftests for typed ksyms. Tests two types of ksyms: one is a struct, the other is a plain int. This tests two paths in the kernel. Struct ksyms will be converted into PTR_TO_BTF_ID by the verifier while int typed ksyms will be converted into PTR_TO_MEM. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-4-haoluo@google.com --- tools/testing/selftests/bpf/prog_tests/ksyms.c | 38 ++++-------- tools/testing/selftests/bpf/prog_tests/ksyms_btf.c | 70 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_ksyms_btf.c | 23 +++++++ tools/testing/selftests/bpf/trace_helpers.c | 27 +++++++++ tools/testing/selftests/bpf/trace_helpers.h | 4 ++ 5 files changed, 137 insertions(+), 25 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms_btf.c create mode 100644 tools/testing/selftests/bpf/progs/test_ksyms_btf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c index b771804b2342..b295969b263b 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -7,40 +7,28 @@ static int duration; -static __u64 kallsyms_find(const char *sym) -{ - char type, name[500]; - __u64 addr, res = 0; - FILE *f; - - f = fopen("/proc/kallsyms", "r"); - if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno)) - return 0; - - while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) { - if (strcmp(name, sym) == 0) { - res = addr; - goto out; - } - } - - CHECK(false, "not_found", "symbol %s not found\n", sym); -out: - fclose(f); - return res; -} - void test_ksyms(void) { - __u64 per_cpu_start_addr = kallsyms_find("__per_cpu_start"); - __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); const char *btf_path = "/sys/kernel/btf/vmlinux"; struct test_ksyms *skel; struct test_ksyms__data *data; + __u64 link_fops_addr, per_cpu_start_addr; struct stat st; __u64 btf_size; int err; + err = kallsyms_find("bpf_link_fops", &link_fops_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n")) + return; + + err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n")) + return; + if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) return; btf_size = st.st_size; diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c new file mode 100644 index 000000000000..c6ef06c0629a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google */ + +#include +#include +#include +#include "test_ksyms_btf.skel.h" + +static int duration; + +void test_ksyms_btf(void) +{ + __u64 runqueues_addr, bpf_prog_active_addr; + struct test_ksyms_btf *skel = NULL; + struct test_ksyms_btf__data *data; + struct btf *btf; + int percpu_datasec; + int err; + + err = kallsyms_find("runqueues", &runqueues_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n")) + return; + + err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n")) + return; + + btf = libbpf_find_kernel_btf(); + if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", + PTR_ERR(btf))) + return; + + percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", + BTF_KIND_DATASEC); + if (percpu_datasec < 0) { + printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n", + __func__); + test__skip(); + goto cleanup; + } + + skel = test_ksyms_btf__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + goto cleanup; + + err = test_ksyms_btf__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr", + "got %llu, exp %llu\n", + (unsigned long long)data->out__runqueues_addr, + (unsigned long long)runqueues_addr); + CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr", + "got %llu, exp %llu\n", + (unsigned long long)data->out__bpf_prog_active_addr, + (unsigned long long)bpf_prog_active_addr); + +cleanup: + btf__free(btf); + test_ksyms_btf__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c new file mode 100644 index 000000000000..7dde2082131d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google */ + +#include "vmlinux.h" + +#include + +__u64 out__runqueues_addr = -1; +__u64 out__bpf_prog_active_addr = -1; + +extern const struct rq runqueues __ksym; /* struct type global var. */ +extern const int bpf_prog_active __ksym; /* int type global var. */ + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + out__runqueues_addr = (__u64)&runqueues; + out__bpf_prog_active_addr = (__u64)&bpf_prog_active; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 4d0e913bbb22..1bbd1d9830c8 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -90,6 +90,33 @@ long ksym_get_addr(const char *name) return 0; } +/* open kallsyms and read symbol addresses on the fly. Without caching all symbols, + * this is faster than load + find. + */ +int kallsyms_find(const char *sym, unsigned long long *addr) +{ + char type, name[500]; + unsigned long long value; + int err = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (!f) + return -EINVAL; + + while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { + if (strcmp(name, sym) == 0) { + *addr = value; + goto out; + } + } + err = -ENOENT; + +out: + fclose(f); + return err; +} + void read_trace_pipe(void) { int trace_fd; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 25ef597dd03f..f62fdef9e589 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -12,6 +12,10 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); + +/* open kallsyms and find addresses on the fly, faster than load + search. */ +int kallsyms_find(const char *sym, unsigned long long *addr); + void read_trace_pipe(void); #endif -- cgit v1.3.1 From eaa6bcb71ef6ed3dc18fc525ee7e293b06b4882b Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:47 -0700 Subject: bpf: Introduce bpf_per_cpu_ptr() Add bpf_per_cpu_ptr() to help bpf programs access percpu vars. bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel except that it may return NULL. This happens when the cpu parameter is out of range. So the caller must check the returned value. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-5-haoluo@google.com --- include/linux/bpf.h | 4 +++ include/linux/btf.h | 11 ++++++++ include/uapi/linux/bpf.h | 18 ++++++++++++ kernel/bpf/btf.c | 10 ------- kernel/bpf/helpers.c | 18 ++++++++++++ kernel/bpf/verifier.c | 64 ++++++++++++++++++++++++++++++++++++++++-- kernel/trace/bpf_trace.c | 2 ++ tools/include/uapi/linux/bpf.h | 18 ++++++++++++ 8 files changed, 132 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50e5c4b52bd1..9dde15b2479d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -293,6 +293,7 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ __BPF_ARG_TYPE_MAX, }; @@ -307,6 +308,7 @@ enum bpf_return_type { RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -405,6 +407,7 @@ enum bpf_reg_type { PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ + PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ }; /* The information passed from prog-specific *_is_valid_access @@ -1828,6 +1831,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; +extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/btf.h b/include/linux/btf.h index af1244180588..2bf641829664 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, i < btf_type_vlen(struct_type); \ i++, member++) +#define for_each_vsi(i, datasec_type, member) \ + for (i = 0, member = btf_type_var_secinfo(datasec_type); \ + i < btf_type_vlen(datasec_type); \ + i++, member++) + static inline bool btf_type_is_ptr(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; @@ -194,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t) return (const struct btf_member *)(t + 1); } +static inline const struct btf_var_secinfo *btf_type_var_secinfo( + const struct btf_type *t) +{ + return (const struct btf_var_secinfo *)(t + 1); +} + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2aa156af24d6..f3c1b637ab39 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3686,6 +3686,23 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A pointer pointing to the kernel percpu variable on *cpu*, or + * NULL, if *cpu* is invalid. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3841,6 +3858,7 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ + FN(bpf_per_cpu_ptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 00569afe3d0d..ed7d02e8bc93 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -188,11 +188,6 @@ i < btf_type_vlen(struct_type); \ i++, member++) -#define for_each_vsi(i, struct_type, member) \ - for (i = 0, member = btf_type_var_secinfo(struct_type); \ - i < btf_type_vlen(struct_type); \ - i++, member++) - #define for_each_vsi_from(i, from, struct_type, member) \ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \ i < btf_type_vlen(struct_type); \ @@ -598,11 +593,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t) return (const struct btf_var *)(t + 1); } -static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t) -{ - return (const struct btf_var_secinfo *)(t + 1); -} - static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) { return kind_ops[BTF_INFO_KIND(t->info)]; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e825441781ab..14fe3f64fd82 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -623,6 +623,22 @@ const struct bpf_func_proto bpf_copy_from_user_proto = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) +{ + if (cpu >= nr_cpu_ids) + return (unsigned long)NULL; + + return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); +} + +const struct bpf_func_proto bpf_per_cpu_ptr_proto = { + .func = bpf_per_cpu_ptr, + .gpl_only = false, + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, + .arg2_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; @@ -689,6 +705,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_snprintf_btf_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; + case BPF_FUNC_bpf_per_cpu_ptr: + return &bpf_per_cpu_ptr_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fe4965079773..216b8ece23ce 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -238,6 +238,8 @@ struct bpf_call_arg_meta { u64 msize_max_value; int ref_obj_id; int func_id; + u32 btf_id; + u32 ret_btf_id; }; struct btf *btf_vmlinux; @@ -517,6 +519,7 @@ static const char * const reg_type_str[] = { [PTR_TO_XDP_SOCK] = "xdp_sock", [PTR_TO_BTF_ID] = "ptr_", [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", + [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", [PTR_TO_MEM] = "mem", [PTR_TO_MEM_OR_NULL] = "mem_or_null", [PTR_TO_RDONLY_BUF] = "rdonly_buf", @@ -583,7 +586,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); } else { - if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL) + if (t == PTR_TO_BTF_ID || + t == PTR_TO_BTF_ID_OR_NULL || + t == PTR_TO_PERCPU_BTF_ID) verbose(env, "%s", kernel_type_name(reg->btf_id)); verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) @@ -2204,6 +2209,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_RDONLY_BUF_OR_NULL: case PTR_TO_RDWR_BUF: case PTR_TO_RDWR_BUF_OR_NULL: + case PTR_TO_PERCPU_BTF_ID: return true; default: return false; @@ -4017,6 +4023,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -4042,6 +4049,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, + [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -4205,6 +4213,12 @@ skip_type_check: err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta); + } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) { + if (!reg->btf_id) { + verbose(env, "Helper has invalid btf_id in R%d\n", regno); + return -EACCES; + } + meta->ret_btf_id = reg->btf_id; } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) @@ -5114,6 +5128,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].mem_size = meta.mem_size; + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) { + const struct btf_type *t; + + mark_reg_known_zero(env, regs, BPF_REG_0); + t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL); + if (!btf_type_is_struct(t)) { + u32 tsize; + const struct btf_type *ret; + const char *tname; + + /* resolve the type size of ksym. */ + ret = btf_resolve_size(btf_vmlinux, t, &tsize); + if (IS_ERR(ret)) { + tname = btf_name_by_offset(btf_vmlinux, t->name_off); + verbose(env, "unable to resolve the size of type '%s': %ld\n", + tname, PTR_ERR(ret)); + return -EINVAL; + } + regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; + regs[BPF_REG_0].mem_size = tsize; + } else { + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; + regs[BPF_REG_0].btf_id = meta.ret_btf_id; + } } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { int ret_btf_id; @@ -7523,6 +7561,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) dst_reg->mem_size = aux->btf_var.mem_size; break; case PTR_TO_BTF_ID: + case PTR_TO_PERCPU_BTF_ID: dst_reg->btf_id = aux->btf_var.btf_id; break; default: @@ -9449,10 +9488,14 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn_aux_data *aux) { - u32 type, id = insn->imm; + u32 datasec_id, type, id = insn->imm; + const struct btf_var_secinfo *vsi; + const struct btf_type *datasec; const struct btf_type *t; const char *sym_name; + bool percpu = false; u64 addr; + int i; if (!btf_vmlinux) { verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); @@ -9484,12 +9527,27 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, return -ENOENT; } + datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu", + BTF_KIND_DATASEC); + if (datasec_id > 0) { + datasec = btf_type_by_id(btf_vmlinux, datasec_id); + for_each_vsi(i, datasec, vsi) { + if (vsi->type == id) { + percpu = true; + break; + } + } + } + insn[0].imm = (u32)addr; insn[1].imm = addr >> 32; type = t->type; t = btf_type_skip_modifiers(btf_vmlinux, type, NULL); - if (!btf_type_is_struct(t)) { + if (percpu) { + aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID; + aux->btf_var.btf_id = type; + } else if (!btf_type_is_struct(t)) { const struct btf_type *ret; const char *tname; u32 tsize; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e118a83439c3..364a322e2898 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1327,6 +1327,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; + case BPF_FUNC_bpf_per_cpu_ptr: + return &bpf_per_cpu_ptr_proto; default: return NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2aa156af24d6..f3c1b637ab39 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3686,6 +3686,23 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A pointer pointing to the kernel percpu variable on *cpu*, or + * NULL, if *cpu* is invalid. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3841,6 +3858,7 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ + FN(bpf_per_cpu_ptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:48 -0700 Subject: bpf: Introducte bpf_this_cpu_ptr() Add bpf_this_cpu_ptr() to help access percpu var on this cpu. This helper always returns a valid pointer, therefore no need to check returned value for NULL. Also note that all programs run with preemption disabled, which means that the returned pointer is stable during all the execution of the program. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-6-haoluo@google.com --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 13 +++++++++++++ kernel/bpf/helpers.c | 14 ++++++++++++++ kernel/bpf/verifier.c | 11 ++++++++--- kernel/trace/bpf_trace.c | 2 ++ tools/include/uapi/linux/bpf.h | 13 +++++++++++++ 6 files changed, 52 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9dde15b2479d..dc63eeed4fd9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -309,6 +309,7 @@ enum bpf_return_type { RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -1832,6 +1833,7 @@ extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; +extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f3c1b637ab39..c446394135be 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3703,6 +3703,18 @@ union bpf_attr { * Return * A pointer pointing to the kernel percpu variable on *cpu*, or * NULL, if *cpu* is invalid. + * + * void *bpf_this_cpu_ptr(const void *percpu_ptr) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on this cpu. See the + * description of 'ksym' in **bpf_per_cpu_ptr**\ (). + * + * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in + * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would + * never return NULL. + * Return + * A pointer pointing to the kernel percpu variable on this cpu. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3859,6 +3871,7 @@ union bpf_attr { FN(skb_cgroup_classid), \ FN(redirect_neigh), \ FN(bpf_per_cpu_ptr), \ + FN(bpf_this_cpu_ptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 14fe3f64fd82..25520f5eeaf6 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -639,6 +639,18 @@ const struct bpf_func_proto bpf_per_cpu_ptr_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) +{ + return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); +} + +const struct bpf_func_proto bpf_this_cpu_ptr_proto = { + .func = bpf_this_cpu_ptr, + .gpl_only = false, + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; @@ -707,6 +719,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_jiffies64_proto; case BPF_FUNC_bpf_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; + case BPF_FUNC_bpf_this_cpu_ptr: + return &bpf_this_cpu_ptr_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 216b8ece23ce..d9dbf271ebab 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5128,7 +5128,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].mem_size = meta.mem_size; - } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) { + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { const struct btf_type *t; mark_reg_known_zero(env, regs, BPF_REG_0); @@ -5146,10 +5147,14 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn tname, PTR_ERR(ret)); return -EINVAL; } - regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; + regs[BPF_REG_0].type = + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? + PTR_TO_MEM : PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].mem_size = tsize; } else { - regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; + regs[BPF_REG_0].type = + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? + PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 364a322e2898..a136a6a63a71 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1329,6 +1329,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_snprintf_btf_proto; case BPF_FUNC_bpf_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; + case BPF_FUNC_bpf_this_cpu_ptr: + return &bpf_this_cpu_ptr_proto; default: return NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f3c1b637ab39..c446394135be 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3703,6 +3703,18 @@ union bpf_attr { * Return * A pointer pointing to the kernel percpu variable on *cpu*, or * NULL, if *cpu* is invalid. + * + * void *bpf_this_cpu_ptr(const void *percpu_ptr) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on this cpu. See the + * description of 'ksym' in **bpf_per_cpu_ptr**\ (). + * + * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in + * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would + * never return NULL. + * Return + * A pointer pointing to the kernel percpu variable on this cpu. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3859,6 +3871,7 @@ union bpf_attr { FN(skb_cgroup_classid), \ FN(redirect_neigh), \ FN(bpf_per_cpu_ptr), \ + FN(bpf_this_cpu_ptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 00dc73e44a846fc5310df0e1415a90af76cc135e Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:49 -0700 Subject: bpf/selftests: Test for bpf_per_cpu_ptr() and bpf_this_cpu_ptr() Test bpf_per_cpu_ptr() and bpf_this_cpu_ptr(). Test two paths in the kernel. If the base pointer points to a struct, the returned reg is of type PTR_TO_BTF_ID. Direct pointer dereference can be applied on the returned variable. If the base pointer isn't a struct, the returned reg is of type PTR_TO_MEM, which also supports direct pointer dereference. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-7-haoluo@google.com --- tools/testing/selftests/bpf/prog_tests/ksyms_btf.c | 18 ++++++++++++ tools/testing/selftests/bpf/progs/test_ksyms_btf.c | 32 ++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index c6ef06c0629a..28e26bd3e0ca 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -11,6 +11,8 @@ static int duration; void test_ksyms_btf(void) { __u64 runqueues_addr, bpf_prog_active_addr; + __u32 this_rq_cpu; + int this_bpf_prog_active; struct test_ksyms_btf *skel = NULL; struct test_ksyms_btf__data *data; struct btf *btf; @@ -64,6 +66,22 @@ void test_ksyms_btf(void) (unsigned long long)data->out__bpf_prog_active_addr, (unsigned long long)bpf_prog_active_addr); + CHECK(data->out__rq_cpu == -1, "rq_cpu", + "got %u, exp != -1\n", data->out__rq_cpu); + CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active", + "got %d, exp >= 0\n", data->out__bpf_prog_active); + CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu", + "got %u, exp 0\n", data->out__cpu_0_rq_cpu); + + this_rq_cpu = data->out__this_rq_cpu; + CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu", + "got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu); + + this_bpf_prog_active = data->out__this_bpf_prog_active; + CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active", + "got %d, exp %d\n", this_bpf_prog_active, + data->out__bpf_prog_active); + cleanup: btf__free(btf); test_ksyms_btf__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c index 7dde2082131d..bb8ea9270f29 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c @@ -8,15 +8,47 @@ __u64 out__runqueues_addr = -1; __u64 out__bpf_prog_active_addr = -1; +__u32 out__rq_cpu = -1; /* percpu struct fields */ +int out__bpf_prog_active = -1; /* percpu int */ + +__u32 out__this_rq_cpu = -1; +int out__this_bpf_prog_active = -1; + +__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */ + extern const struct rq runqueues __ksym; /* struct type global var. */ extern const int bpf_prog_active __ksym; /* int type global var. */ SEC("raw_tp/sys_enter") int handler(const void *ctx) { + struct rq *rq; + int *active; + __u32 cpu; + out__runqueues_addr = (__u64)&runqueues; out__bpf_prog_active_addr = (__u64)&bpf_prog_active; + cpu = bpf_get_smp_processor_id(); + + /* test bpf_per_cpu_ptr() */ + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu); + if (rq) + out__rq_cpu = rq->cpu; + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) + out__bpf_prog_active = *active; + + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); + if (rq) /* should always be valid, but we can't spare the check. */ + out__cpu_0_rq_cpu = rq->cpu; + + /* test bpf_this_cpu_ptr */ + rq = (struct rq *)bpf_this_cpu_ptr(&runqueues); + out__this_rq_cpu = rq->cpu; + active = (int *)bpf_this_cpu_ptr(&bpf_prog_active); + out__this_bpf_prog_active = *active; + return 0; } -- cgit v1.3.1 From 91274ca535185813cd5eebfe5a4d3344750c74e3 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 1 Oct 2020 18:10:09 -0700 Subject: bpf, sockmap: Update selftests to use skb_adjust_room Instead of working around TLS headers in sockmap selftests use the new skb_adjust_room helper. This allows us to avoid special casing the receive side to skip headers. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/160160100932.7052.3646935243867660528.stgit@john-Precision-5820-Tower --- .../selftests/bpf/progs/test_sockmap_kern.h | 34 ++++++++++++++++------ tools/testing/selftests/bpf/test_sockmap.c | 27 ++++------------- 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 3dca4c2e2418..1858435de7aa 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -131,39 +131,55 @@ int bpf_prog2(struct __sk_buff *skb) } -SEC("sk_skb3") -int bpf_prog3(struct __sk_buff *skb) +static inline void bpf_write_pass(struct __sk_buff *skb, int offset) { - const int one = 1; - int err, *f, ret = SK_PASS; + int err = bpf_skb_pull_data(skb, 6 + offset); void *data_end; char *c; - err = bpf_skb_pull_data(skb, 19); if (err) - goto tls_out; + return; c = (char *)(long)skb->data; data_end = (void *)(long)skb->data_end; - if (c + 18 < data_end) - memcpy(&c[13], "PASS", 4); + if (c + 5 + offset < data_end) + memcpy(c + offset, "PASS", 4); +} + +SEC("sk_skb3") +int bpf_prog3(struct __sk_buff *skb) +{ + int err, *f, ret = SK_PASS; + const int one = 1; + f = bpf_map_lookup_elem(&sock_skb_opts, &one); if (f && *f) { __u64 flags = 0; ret = 0; flags = *f; + + err = bpf_skb_adjust_room(skb, -13, 0, 0); + if (err) + return SK_DROP; + err = bpf_skb_adjust_room(skb, 4, 0, 0); + if (err) + return SK_DROP; + bpf_write_pass(skb, 0); #ifdef SOCKMAP return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags); #else return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); #endif } - f = bpf_map_lookup_elem(&sock_skb_opts, &one); if (f && *f) ret = SK_DROP; + err = bpf_skb_adjust_room(skb, 4, 0, 0); + if (err) + return SK_DROP; + bpf_write_pass(skb, 13); tls_out: return ret; } diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 9b6fb00dc7a0..5cf45455de42 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -518,28 +518,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) if (i == 0 && txmsg_ktls_skb) { if (msg->msg_iov[i].iov_len < 4) return -EIO; - if (txmsg_ktls_skb_redir) { - if (memcmp(&d[13], "PASS", 4) != 0) { - fprintf(stderr, - "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]); - return -EIO; - } - d[13] = 0; - d[14] = 1; - d[15] = 2; - d[16] = 3; - j = 13; - } else if (txmsg_ktls_skb) { - if (memcmp(d, "PASS", 4) != 0) { - fprintf(stderr, - "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]); - return -EIO; - } - d[0] = 0; - d[1] = 1; - d[2] = 2; - d[3] = 3; + if (memcmp(d, "PASS", 4) != 0) { + fprintf(stderr, + "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", + i, 0, d[0], d[1], d[2], d[3]); + return -EIO; } + j = 4; /* advance index past PASS header */ } for (; j < msg->msg_iov[i].iov_len && size; j++) { -- cgit v1.3.1 From 8cd6b020b644d1f1fc2a8768032b8cb8472ab352 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 2 Oct 2020 15:02:28 +0300 Subject: selftests: ocelot: add some example VCAP IS1, IS2 and ES0 tc offloads Provide an example script which can be used as a skeleton for offloading TCAM rules in the Ocelot switches. Not all actions are demoed, mostly because of difficulty to automate this from a single board. For example, policing. We can set up an iperf3 UDP server and client and measure throughput at destination. But at least with DSA setups, network namespacing the individual ports is not possible because all switch ports are handled by the same DSA master. And we cannot assume that the target platform (an embedded board) has 2 other non-switch generator ports, we need to work with the generator ports as switch ports (this is the reason why mausezahn is used, and not IP traffic like ping). When somebody has an idea how to test policing, that can be added to this test. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- MAINTAINERS | 1 + .../drivers/net/ocelot/tc_flower_chains.sh | 273 +++++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 43 ++++ 3 files changed, 317 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index 8f1c9eb6a869..c2038d47e47f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12542,6 +12542,7 @@ F: drivers/net/dsa/ocelot/* F: drivers/net/ethernet/mscc/ F: include/soc/mscc/ocelot* F: net/dsa/tag_ocelot.c +F: tools/testing/selftests/drivers/net/ocelot/* OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER M: Frederic Barrat diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh new file mode 100755 index 000000000000..71a538add08a --- /dev/null +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -0,0 +1,273 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2020 NXP Semiconductors + +WAIT_TIME=1 +NUM_NETIFS=4 +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +require_command tcpdump + +# +# +---------------------------------------------+ +# | DUT ports Generator ports | +# | +--------+ +--------+ +--------+ +--------+ | +# | | | | | | | | | | +# | | eth0 | | eth1 | | eth2 | | eth3 | | +# | | | | | | | | | | +# +-+--------+-+--------+-+--------+-+--------+-+ +# | | | | +# | | | | +# | +-----------+ | +# | | +# +--------------------------------+ + +eth0=${NETIFS[p1]} +eth1=${NETIFS[p2]} +eth2=${NETIFS[p3]} +eth3=${NETIFS[p4]} + +eth0_mac="de:ad:be:ef:00:00" +eth1_mac="de:ad:be:ef:00:01" +eth2_mac="de:ad:be:ef:00:02" +eth3_mac="de:ad:be:ef:00:03" + +# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number +# used by the kernel driver. The numbers are: +# VCAP IS1 lookup 0: 10000 +# VCAP IS1 lookup 1: 11000 +# VCAP IS1 lookup 2: 12000 +# VCAP IS2 lookup 0 policy 0: 20000 +# VCAP IS2 lookup 0 policy 1: 20001 +# VCAP IS2 lookup 0 policy 255: 20255 +# VCAP IS2 lookup 1 policy 0: 21000 +# VCAP IS2 lookup 1 policy 1: 21001 +# VCAP IS2 lookup 1 policy 255: 21255 +IS1() +{ + local lookup=$1 + + echo $((10000 + 1000 * lookup)) +} + +IS2() +{ + local lookup=$1 + local pag=$2 + + echo $((20000 + 1000 * lookup + pag)) +} + +ES0() +{ + echo 0 +} + +# The Ocelot switches have a fixed ingress pipeline composed of: +# +# +----------------------------------------------+ +-----------------------------------------+ +# | VCAP IS1 | | VCAP IS2 | +# | | | | +# | +----------+ +----------+ +----------+ | | +----------+ +----------+ | +# | | Lookup 0 | | Lookup 1 | | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | | +# | +----------+ -> +----------+ -> +----------+ | | | +----------+ +----------+ | +# | |key&action| |key&action| |key&action| | | | |key&action| |key&action| | +# | |key&action| |key&action| |key&action| | | | | .. | | .. | | +# | | .. | | .. | | .. | | | | +----------+ +----------+ | +# | +----------+ +----------+ +----------+ | | | | +# | selects PAG | | | +----------+ +----------+ | +# +----------------------------------------------+ +------> PAG 1: | Lookup 0 | -> | Lookup 1 | | +# | | +----------+ +----------+ | +# | | |key&action| |key&action| | +# | | | .. | | .. | | +# | | +----------+ +----------+ | +# | | ... | +# | | | +# | | +----------+ +----------+ | +# +----> PAG 254: | Lookup 0 | -> | Lookup 1 | | +# | | +----------+ +----------+ | +# | | |key&action| |key&action| | +# | | | .. | | .. | | +# | | +----------+ +----------+ | +# | | | +# | | +----------+ +----------+ | +# +----> PAG 255: | Lookup 0 | -> | Lookup 1 | | +# | +----------+ +----------+ | +# | |key&action| |key&action| | +# | | .. | | .. | | +# | +----------+ +----------+ | +# +-----------------------------------------+ +# +# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed +# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter +# (key and action pair) can be configured to only match during the first, or +# second, etc, lookup. +# +# During one TCAM lookup, the filter processing stops at the first entry that +# matches, then the pipeline jumps to the next lookup. +# The driver maps each individual lookup of each individual ingress TCAM to a +# separate chain number. For correct rule offloading, it is mandatory that each +# filter installed in one TCAM is terminated by a non-optional GOTO action to +# the next lookup from the fixed pipeline. +# +# A chain can only be used if there is a GOTO action correctly set up from the +# prior lookup in the processing pipeline. Setting up all chains is not +# mandatory. + +# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2 +# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are +# all half keys as well. + +create_tcam_skeleton() +{ + local eth=$1 + + tc qdisc add dev $eth clsact + + # VCAP IS1 is the Ingress Classification TCAM and can offload the + # following actions: + # - skbedit priority + # - vlan pop + # - vlan modify + # - goto (only in lookup 2, the last IS1 lookup) + tc filter add dev $eth ingress chain 0 pref 49152 flower \ + skip_sw action goto chain $(IS1 0) + tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \ + flower skip_sw action goto chain $(IS1 1) + tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \ + flower skip_sw action goto chain $(IS1 2) + tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \ + flower skip_sw action goto chain $(IS2 0 0) + + # VCAP IS2 is the Security Enforcement ingress TCAM and can offload the + # following actions: + # - trap + # - drop + # - police + # The two VCAP IS2 lookups can be segmented into up to 256 groups of + # rules, called Policies. A Policy is selected through the Policy + # Association Group (PAG) action of VCAP IS1 (which is the + # GOTO offload). + tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \ + flower skip_sw action goto chain $(IS2 1 0) +} + +setup_prepare() +{ + create_tcam_skeleton $eth0 + + ip link add br0 type bridge + ip link set $eth0 master br0 + ip link set $eth1 master br0 + ip link set br0 up + + ip link add link $eth3 name $eth3.100 type vlan id 100 + ip link set $eth3.100 up + + tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \ + protocol 802.1Q flower skip_sw vlan_id 100 \ + action vlan pop \ + action goto chain $(IS1 2) + + tc filter add dev $eth0 egress chain $(ES0) pref 1 \ + flower skip_sw indev $eth1 \ + action vlan push protocol 802.1Q id 100 + + tc filter add dev $eth0 ingress chain $(IS1 0) \ + protocol ipv4 flower skip_sw src_ip 10.1.1.2 \ + action skbedit priority 7 \ + action goto chain $(IS1 1) + + tc filter add dev $eth0 ingress chain $(IS2 0 0) \ + protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \ + action police rate 50mbit burst 64k \ + action goto chain $(IS2 1 0) +} + +cleanup() +{ + ip link del $eth3.100 + tc qdisc del dev $eth0 clsact + ip link del br0 +} + +test_vlan_pop() +{ + printf "Testing VLAN pop.. " + + tcpdump_start $eth2 + + # Work around Mausezahn VLAN builder bug + # (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using + # an 8021q upper + $MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup +} + +test_vlan_push() +{ + printf "Testing VLAN push.. " + + tcpdump_start $eth3.100 + + $MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup +} + +test_skbedit_priority() +{ + local num_pkts=100 + + printf "Testing frame prioritization.. " + + before=$(ethtool_stats_get $eth0 'rx_green_prio_7') + + $MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2 + + after=$(ethtool_stats_get $eth0 'rx_green_prio_7') + + if [ $((after - before)) = $num_pkts ]; then + echo "OK" + else + echo "FAIL" + fi +} + +trap cleanup EXIT + +ALL_TESTS=" + test_vlan_pop + test_vlan_push + test_skbedit_priority +" + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 977fc2b326a2..927f9ba49e08 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1227,3 +1227,46 @@ stop_traffic() # Suppress noise from killing mausezahn. { kill %% && wait %%; } 2>/dev/null } + +tcpdump_start() +{ + local if_name=$1; shift + local ns=$1; shift + + capfile=$(mktemp) + capout=$(mktemp) + + if [ -z $ns ]; then + ns_cmd="" + else + ns_cmd="ip netns exec ${ns}" + fi + + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + $ns_cmd tcpdump -e -n -Q in -i $if_name \ + -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + cappid=$! + + sleep 1 +} + +tcpdump_stop() +{ + $ns_cmd kill $cappid + sleep 1 +} + +tcpdump_cleanup() +{ + rm $capfile $capout +} + +tcpdump_show() +{ + tcpdump -e -n -r $capfile 2>&1 +} -- cgit v1.3.1 From 5f764d624a89d4d00d282157077878d4e7c69869 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:43 +0200 Subject: fs: remove the compat readv/writev syscalls Now that import_iovec handles compat iovecs, the native readv and writev syscalls can be used for the compat case as well. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- arch/arm64/include/asm/unistd32.h | 4 ++-- arch/mips/kernel/syscalls/syscall_n32.tbl | 4 ++-- arch/mips/kernel/syscalls/syscall_o32.tbl | 4 ++-- arch/parisc/kernel/syscalls/syscall.tbl | 4 ++-- arch/powerpc/kernel/syscalls/syscall.tbl | 4 ++-- arch/s390/kernel/syscalls/syscall.tbl | 4 ++-- arch/sparc/kernel/syscalls/syscall.tbl | 4 ++-- arch/x86/entry/syscall_x32.c | 2 ++ arch/x86/entry/syscalls/syscall_32.tbl | 4 ++-- arch/x86/entry/syscalls/syscall_64.tbl | 4 ++-- fs/read_write.c | 14 -------------- include/linux/compat.h | 4 ---- include/uapi/asm-generic/unistd.h | 4 ++-- tools/include/uapi/asm-generic/unistd.h | 4 ++-- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 4 ++-- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 4 ++-- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 4 ++-- 17 files changed, 30 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 734860ac7cf9..4a236493dca5 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -301,9 +301,9 @@ __SYSCALL(__NR_flock, sys_flock) #define __NR_msync 144 __SYSCALL(__NR_msync, sys_msync) #define __NR_readv 145 -__SYSCALL(__NR_readv, compat_sys_readv) +__SYSCALL(__NR_readv, sys_readv) #define __NR_writev 146 -__SYSCALL(__NR_writev, compat_sys_writev) +__SYSCALL(__NR_writev, sys_writev) #define __NR_getsid 147 __SYSCALL(__NR_getsid, sys_getsid) #define __NR_fdatasync 148 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index f9df9edb67a4..c99a92646f8e 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -25,8 +25,8 @@ 15 n32 ioctl compat_sys_ioctl 16 n32 pread64 sys_pread64 17 n32 pwrite64 sys_pwrite64 -18 n32 readv compat_sys_readv -19 n32 writev compat_sys_writev +18 n32 readv sys_readv +19 n32 writev sys_writev 20 n32 access sys_access 21 n32 pipe sysm_pipe 22 n32 _newselect compat_sys_select diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 195b43cf27c8..075064d10661 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -156,8 +156,8 @@ 142 o32 _newselect sys_select compat_sys_select 143 o32 flock sys_flock 144 o32 msync sys_msync -145 o32 readv sys_readv compat_sys_readv -146 o32 writev sys_writev compat_sys_writev +145 o32 readv sys_readv +146 o32 writev sys_writev 147 o32 cacheflush sys_cacheflush 148 o32 cachectl sys_cachectl 149 o32 sysmips __sys_sysmips diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index def64d221cd4..192abde0001d 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -159,8 +159,8 @@ 142 common _newselect sys_select compat_sys_select 143 common flock sys_flock 144 common msync sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv +146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync 149 common _sysctl sys_ni_syscall diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index c2d737ff2e7b..6f1e2ecf0eda 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -193,8 +193,8 @@ 142 common _newselect sys_select compat_sys_select 143 common flock sys_flock 144 common msync sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv +146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync 149 nospu _sysctl sys_ni_syscall diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 10456bc936fb..6101cf2e004c 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -134,8 +134,8 @@ 142 64 select sys_select - 143 common flock sys_flock sys_flock 144 common msync sys_msync sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv sys_readv +146 common writev sys_writev sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync 149 common _sysctl - - diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4af114e84f20..a87ddb282ab1 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -149,8 +149,8 @@ 117 common getrusage sys_getrusage compat_sys_getrusage 118 common getsockopt sys_getsockopt sys_getsockopt 119 common getcwd sys_getcwd -120 common readv sys_readv compat_sys_readv -121 common writev sys_writev compat_sys_writev +120 common readv sys_readv +121 common writev sys_writev 122 common settimeofday sys_settimeofday compat_sys_settimeofday 123 32 fchown sys_fchown16 123 64 fchown sys_fchown diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index 1583831f61a9..aa321444a41f 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -12,6 +12,8 @@ * Reuse the 64-bit entry points for the x32 versions that occupy different * slots in the syscall table. */ +#define __x32_sys_readv __x64_sys_readv +#define __x32_sys_writev __x64_sys_writev #define __x32_sys_getsockopt __x64_sys_getsockopt #define __x32_sys_setsockopt __x64_sys_setsockopt diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 9d1102873666..54ab4beb517f 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -156,8 +156,8 @@ 142 i386 _newselect sys_select compat_sys_select 143 i386 flock sys_flock 144 i386 msync sys_msync -145 i386 readv sys_readv compat_sys_readv -146 i386 writev sys_writev compat_sys_writev +145 i386 readv sys_readv +146 i386 writev sys_writev 147 i386 getsid sys_getsid 148 i386 fdatasync sys_fdatasync 149 i386 _sysctl sys_ni_syscall diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f30d6ae9a688..b1e59957c5c5 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -371,8 +371,8 @@ 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn 514 x32 ioctl compat_sys_ioctl -515 x32 readv compat_sys_readv -516 x32 writev compat_sys_writev +515 x32 readv sys_readv +516 x32 writev sys_writev 517 x32 recvfrom compat_sys_recvfrom 518 x32 sendmsg compat_sys_sendmsg 519 x32 recvmsg compat_sys_recvmsg diff --git a/fs/read_write.c b/fs/read_write.c index eab427b7cc0a..6c13f744c34a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1074,13 +1074,6 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, * in_compat_syscall(). */ #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, - const struct iovec __user *, vec, - compat_ulong_t, vlen) -{ - return do_readv(fd, vec, vlen, 0); -} - #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, const struct iovec __user *, vec, @@ -1122,13 +1115,6 @@ COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, return do_preadv(fd, vec, vlen, pos, flags); } -COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, - const struct iovec __user *, vec, - compat_ulong_t, vlen) -{ - return do_writev(fd, vec, vlen, 0); -} - #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, const struct iovec __user *, vec, diff --git a/include/linux/compat.h b/include/linux/compat.h index 36b5842162c7..07268fc8082b 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -545,10 +545,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd, /* fs/read_write.c */ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); -asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd, - const struct iovec __user *vec, compat_ulong_t vlen); -asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd, - const struct iovec __user *vec, compat_ulong_t vlen); /* No generic prototype for pread64 and pwrite64 */ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, const struct iovec __user *vec, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 995b36c2ea7d..211c9eacbda6 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read) #define __NR_write 64 __SYSCALL(__NR_write, sys_write) #define __NR_readv 65 -__SC_COMP(__NR_readv, sys_readv, compat_sys_readv) +__SC_COMP(__NR_readv, sys_readv, sys_readv) #define __NR_writev 66 -__SC_COMP(__NR_writev, sys_writev, compat_sys_writev) +__SC_COMP(__NR_writev, sys_writev, sys_writev) #define __NR_pread64 67 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64) #define __NR_pwrite64 68 diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 995b36c2ea7d..211c9eacbda6 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read) #define __NR_write 64 __SYSCALL(__NR_write, sys_write) #define __NR_readv 65 -__SC_COMP(__NR_readv, sys_readv, compat_sys_readv) +__SC_COMP(__NR_readv, sys_readv, sys_readv) #define __NR_writev 66 -__SC_COMP(__NR_writev, sys_writev, compat_sys_writev) +__SC_COMP(__NR_writev, sys_writev, sys_writev) #define __NR_pread64 67 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64) #define __NR_pwrite64 68 diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3ca6fe057a0b..46be68029587 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -189,8 +189,8 @@ 142 common _newselect sys_select compat_sys_select 143 common flock sys_flock 144 common msync sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv +146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync 149 nospu _sysctl sys_ni_syscall diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 6a0bbea225db..fb5e61ce9d58 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -134,8 +134,8 @@ 142 64 select sys_select - 143 common flock sys_flock sys_flock 144 common msync sys_msync compat_sys_msync -145 common readv sys_readv compat_sys_readv -146 common writev sys_writev compat_sys_writev +145 common readv sys_readv +146 common writev sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync 149 common _sysctl - - diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index f30d6ae9a688..b1e59957c5c5 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -371,8 +371,8 @@ 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn 514 x32 ioctl compat_sys_ioctl -515 x32 readv compat_sys_readv -516 x32 writev compat_sys_writev +515 x32 readv sys_readv +516 x32 writev sys_writev 517 x32 recvfrom compat_sys_recvfrom 518 x32 sendmsg compat_sys_sendmsg 519 x32 recvmsg compat_sys_recvmsg -- cgit v1.3.1 From 598b3cec831fd6ccb3cbe4919a722e868c6364a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:44 +0200 Subject: fs: remove compat_sys_vmsplice Now that import_iovec handles compat iovecs, the native vmsplice syscall can be used for the compat case as well. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- arch/arm64/include/asm/unistd32.h | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscall_x32.c | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- fs/splice.c | 57 +++++----------------- include/linux/compat.h | 4 -- include/uapi/asm-generic/unistd.h | 2 +- tools/include/uapi/asm-generic/unistd.h | 2 +- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 +- 17 files changed, 28 insertions(+), 62 deletions(-) (limited to 'tools') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 4a236493dca5..11dfae3a8563 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -697,7 +697,7 @@ __SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2) #define __NR_tee 342 __SYSCALL(__NR_tee, sys_tee) #define __NR_vmsplice 343 -__SYSCALL(__NR_vmsplice, compat_sys_vmsplice) +__SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 344 __SYSCALL(__NR_move_pages, compat_sys_move_pages) #define __NR_getcpu 345 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index c99a92646f8e..5a39d4de0ac8 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -278,7 +278,7 @@ 267 n32 splice sys_splice 268 n32 sync_file_range sys_sync_file_range 269 n32 tee sys_tee -270 n32 vmsplice compat_sys_vmsplice +270 n32 vmsplice sys_vmsplice 271 n32 move_pages compat_sys_move_pages 272 n32 set_robust_list compat_sys_set_robust_list 273 n32 get_robust_list compat_sys_get_robust_list diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 075064d10661..136efc6b8c54 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -318,7 +318,7 @@ 304 o32 splice sys_splice 305 o32 sync_file_range sys_sync_file_range sys32_sync_file_range 306 o32 tee sys_tee -307 o32 vmsplice sys_vmsplice compat_sys_vmsplice +307 o32 vmsplice sys_vmsplice 308 o32 move_pages sys_move_pages compat_sys_move_pages 309 o32 set_robust_list sys_set_robust_list compat_sys_set_robust_list 310 o32 get_robust_list sys_get_robust_list compat_sys_get_robust_list diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 192abde0001d..a9e184192cae 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -330,7 +330,7 @@ 292 32 sync_file_range parisc_sync_file_range 292 64 sync_file_range sys_sync_file_range 293 common tee sys_tee -294 common vmsplice sys_vmsplice compat_sys_vmsplice +294 common vmsplice sys_vmsplice 295 common move_pages sys_move_pages compat_sys_move_pages 296 common getcpu sys_getcpu 297 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 6f1e2ecf0eda..0d4985919ca3 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -369,7 +369,7 @@ 282 common unshare sys_unshare 283 common splice sys_splice 284 common tee sys_tee -285 common vmsplice sys_vmsplice compat_sys_vmsplice +285 common vmsplice sys_vmsplice 286 common openat sys_openat compat_sys_openat 287 common mkdirat sys_mkdirat 288 common mknodat sys_mknodat diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 6101cf2e004c..b5495a42814b 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -316,7 +316,7 @@ 306 common splice sys_splice sys_splice 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range 308 common tee sys_tee sys_tee -309 common vmsplice sys_vmsplice compat_sys_vmsplice +309 common vmsplice sys_vmsplice sys_vmsplice 310 common move_pages sys_move_pages compat_sys_move_pages 311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index a87ddb282ab1..f1810c1a35ca 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -38,7 +38,7 @@ 23 64 setuid sys_setuid 24 32 getuid sys_getuid16 24 64 getuid sys_getuid -25 common vmsplice sys_vmsplice compat_sys_vmsplice +25 common vmsplice sys_vmsplice 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm 28 common sigaltstack sys_sigaltstack compat_sys_sigaltstack diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index aa321444a41f..a4840b9d50ad 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -16,6 +16,7 @@ #define __x32_sys_writev __x64_sys_writev #define __x32_sys_getsockopt __x64_sys_getsockopt #define __x32_sys_setsockopt __x64_sys_setsockopt +#define __x32_sys_vmsplice __x64_sys_vmsplice #define __SYSCALL_64(nr, sym) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 54ab4beb517f..0fb2f172581e 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -327,7 +327,7 @@ 313 i386 splice sys_splice 314 i386 sync_file_range sys_ia32_sync_file_range 315 i386 tee sys_tee -316 i386 vmsplice sys_vmsplice compat_sys_vmsplice +316 i386 vmsplice sys_vmsplice 317 i386 move_pages sys_move_pages compat_sys_move_pages 318 i386 getcpu sys_getcpu 319 i386 epoll_pwait sys_epoll_pwait diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index b1e59957c5c5..642af919183d 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -388,7 +388,7 @@ 529 x32 waitid compat_sys_waitid 530 x32 set_robust_list compat_sys_set_robust_list 531 x32 get_robust_list compat_sys_get_robust_list -532 x32 vmsplice compat_sys_vmsplice +532 x32 vmsplice sys_vmsplice 533 x32 move_pages compat_sys_move_pages 534 x32 preadv compat_sys_preadv64 535 x32 pwritev compat_sys_pwritev64 diff --git a/fs/splice.c b/fs/splice.c index 132d42b9871f..18d84544030b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include "internal.h" @@ -1332,20 +1331,6 @@ static int vmsplice_type(struct fd f, int *type) * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ -static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags) -{ - if (unlikely(flags & ~SPLICE_F_ALL)) - return -EINVAL; - - if (!iov_iter_count(iter)) - return 0; - - if (iov_iter_rw(iter) == WRITE) - return vmsplice_to_pipe(f, iter, flags); - else - return vmsplice_to_user(f, iter, flags); -} - SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, unsigned long, nr_segs, unsigned int, flags) { @@ -1356,6 +1341,9 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, struct fd f; int type; + if (unlikely(flags & ~SPLICE_F_ALL)) + return -EINVAL; + f = fdget(fd); error = vmsplice_type(f, &type); if (error) @@ -1363,40 +1351,21 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (error >= 0) { - error = do_vmsplice(f.file, &iter, flags); - kfree(iov); - } - fdput(f); - return error; -} + if (error < 0) + goto out_fdput; -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32, - unsigned int, nr_segs, unsigned int, flags) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; - ssize_t error; - struct fd f; - int type; - - f = fdget(fd); - error = vmsplice_type(f, &type); - if (error) - return error; + if (!iov_iter_count(&iter)) + error = 0; + else if (iov_iter_rw(&iter) == WRITE) + error = vmsplice_to_pipe(f.file, &iter, flags); + else + error = vmsplice_to_user(f.file, &iter, flags); - error = import_iovec(type, (struct iovec __user *)iov32, nr_segs, - ARRAY_SIZE(iovstack), &iov, &iter); - if (error >= 0) { - error = do_vmsplice(f.file, &iter, flags); - kfree(iov); - } + kfree(iov); +out_fdput: fdput(f); return error; } -#endif SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, diff --git a/include/linux/compat.h b/include/linux/compat.h index 07268fc8082b..7c3e876703cf 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -597,10 +597,6 @@ asmlinkage long compat_sys_signalfd4(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize, int flags); -/* fs/splice.c */ -asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, - unsigned int nr_segs, unsigned int flags); - /* fs/stat.c */ asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user *filename, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 211c9eacbda6..f2dcb0d57030 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4) /* fs/splice.c */ #define __NR_vmsplice 75 -__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice) +__SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_splice 76 __SYSCALL(__NR_splice, sys_splice) #define __NR_tee 77 diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 211c9eacbda6..f2dcb0d57030 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4) /* fs/splice.c */ #define __NR_vmsplice 75 -__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice) +__SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_splice 76 __SYSCALL(__NR_splice, sys_splice) #define __NR_tee 77 diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 46be68029587..26f0347c1511 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -363,7 +363,7 @@ 282 common unshare sys_unshare 283 common splice sys_splice 284 common tee sys_tee -285 common vmsplice sys_vmsplice compat_sys_vmsplice +285 common vmsplice sys_vmsplice 286 common openat sys_openat compat_sys_openat 287 common mkdirat sys_mkdirat 288 common mknodat sys_mknodat diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index fb5e61ce9d58..02ad81f69bb7 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -316,7 +316,7 @@ 306 common splice sys_splice compat_sys_splice 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range 308 common tee sys_tee compat_sys_tee -309 common vmsplice sys_vmsplice compat_sys_vmsplice +309 common vmsplice sys_vmsplice sys_vmsplice 310 common move_pages sys_move_pages compat_sys_move_pages 311 common getcpu sys_getcpu compat_sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index b1e59957c5c5..642af919183d 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -388,7 +388,7 @@ 529 x32 waitid compat_sys_waitid 530 x32 set_robust_list compat_sys_set_robust_list 531 x32 get_robust_list compat_sys_get_robust_list -532 x32 vmsplice compat_sys_vmsplice +532 x32 vmsplice sys_vmsplice 533 x32 move_pages compat_sys_move_pages 534 x32 preadv compat_sys_preadv64 535 x32 pwritev compat_sys_pwritev64 -- cgit v1.3.1 From c3973b401ef2b0b8005f8074a10e96e3ea093823 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:45 +0200 Subject: mm: remove compat_process_vm_{readv,writev} Now that import_iovec handles compat iovecs, the native syscalls can be used for the compat case as well. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- arch/arm64/include/asm/unistd32.h | 4 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 4 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 4 +- arch/parisc/kernel/syscalls/syscall.tbl | 4 +- arch/powerpc/kernel/syscalls/syscall.tbl | 4 +- arch/s390/kernel/syscalls/syscall.tbl | 4 +- arch/sparc/kernel/syscalls/syscall.tbl | 4 +- arch/x86/entry/syscall_x32.c | 2 + arch/x86/entry/syscalls/syscall_32.tbl | 4 +- arch/x86/entry/syscalls/syscall_64.tbl | 4 +- include/linux/compat.h | 8 --- include/uapi/asm-generic/unistd.h | 6 +- mm/process_vm_access.c | 69 ---------------------- tools/include/uapi/asm-generic/unistd.h | 6 +- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 4 +- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 4 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 4 +- 17 files changed, 30 insertions(+), 109 deletions(-) (limited to 'tools') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 11dfae3a8563..0c280a05f699 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -763,9 +763,9 @@ __SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg) #define __NR_setns 375 __SYSCALL(__NR_setns, sys_setns) #define __NR_process_vm_readv 376 -__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv) +__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) #define __NR_process_vm_writev 377 -__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev) +__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) #define __NR_kcmp 378 __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 379 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 5a39d4de0ac8..0bc2e0fcf1ee 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -317,8 +317,8 @@ 306 n32 syncfs sys_syncfs 307 n32 sendmmsg compat_sys_sendmmsg 308 n32 setns sys_setns -309 n32 process_vm_readv compat_sys_process_vm_readv -310 n32 process_vm_writev compat_sys_process_vm_writev +309 n32 process_vm_readv sys_process_vm_readv +310 n32 process_vm_writev sys_process_vm_writev 311 n32 kcmp sys_kcmp 312 n32 finit_module sys_finit_module 313 n32 sched_setattr sys_sched_setattr diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 136efc6b8c54..b408c13b9342 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -356,8 +356,8 @@ 342 o32 syncfs sys_syncfs 343 o32 sendmmsg sys_sendmmsg compat_sys_sendmmsg 344 o32 setns sys_setns -345 o32 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -346 o32 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +345 o32 process_vm_readv sys_process_vm_readv +346 o32 process_vm_writev sys_process_vm_writev 347 o32 kcmp sys_kcmp 348 o32 finit_module sys_finit_module 349 o32 sched_setattr sys_sched_setattr diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index a9e184192cae..2015a5124b78 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -372,8 +372,8 @@ 327 common syncfs sys_syncfs 328 common setns sys_setns 329 common sendmmsg sys_sendmmsg compat_sys_sendmmsg -330 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -331 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +330 common process_vm_readv sys_process_vm_readv +331 common process_vm_writev sys_process_vm_writev 332 common kcmp sys_kcmp 333 common finit_module sys_finit_module 334 common sched_setattr sys_sched_setattr diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 0d4985919ca3..66a472aa635d 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -449,8 +449,8 @@ 348 common syncfs sys_syncfs 349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 350 common setns sys_setns -351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +351 nospu process_vm_readv sys_process_vm_readv +352 nospu process_vm_writev sys_process_vm_writev 353 nospu finit_module sys_finit_module 354 nospu kcmp sys_kcmp 355 common sched_setattr sys_sched_setattr diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index b5495a42814b..7485867a490b 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -347,8 +347,8 @@ 337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns -340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev 342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr 343 common kcmp sys_kcmp sys_kcmp 344 common finit_module sys_finit_module sys_finit_module diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index f1810c1a35ca..4a9365b2e340 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -406,8 +406,8 @@ 335 common syncfs sys_syncfs 336 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 337 common setns sys_setns -338 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -339 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +338 common process_vm_readv sys_process_vm_readv +339 common process_vm_writev sys_process_vm_writev 340 32 kern_features sys_ni_syscall sys_kern_features 340 64 kern_features sys_kern_features 341 common kcmp sys_kcmp diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index a4840b9d50ad..f2fe0a33bcfd 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -17,6 +17,8 @@ #define __x32_sys_getsockopt __x64_sys_getsockopt #define __x32_sys_setsockopt __x64_sys_setsockopt #define __x32_sys_vmsplice __x64_sys_vmsplice +#define __x32_sys_process_vm_readv __x64_sys_process_vm_readv +#define __x32_sys_process_vm_writev __x64_sys_process_vm_writev #define __SYSCALL_64(nr, sym) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0fb2f172581e..5fbe10ad8a23 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -358,8 +358,8 @@ 344 i386 syncfs sys_syncfs 345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg 346 i386 setns sys_setns -347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +347 i386 process_vm_readv sys_process_vm_readv +348 i386 process_vm_writev sys_process_vm_writev 349 i386 kcmp sys_kcmp 350 i386 finit_module sys_finit_module 351 i386 sched_setattr sys_sched_setattr diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 642af919183d..347809649ba2 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -395,8 +395,8 @@ 536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 537 x32 recvmmsg compat_sys_recvmmsg_time64 538 x32 sendmmsg compat_sys_sendmmsg -539 x32 process_vm_readv compat_sys_process_vm_readv -540 x32 process_vm_writev compat_sys_process_vm_writev +539 x32 process_vm_readv sys_process_vm_readv +540 x32 process_vm_writev sys_process_vm_writev 541 x32 setsockopt sys_setsockopt 542 x32 getsockopt sys_getsockopt 543 x32 io_setup compat_sys_io_setup diff --git a/include/linux/compat.h b/include/linux/compat.h index 7c3e876703cf..3e3d2beafed3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -780,14 +780,6 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, int flags); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); -asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, - const struct compat_iovec __user *lvec, - compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, - compat_ulong_t riovcnt, compat_ulong_t flags); -asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, - const struct compat_iovec __user *lvec, - compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, - compat_ulong_t riovcnt, compat_ulong_t flags); asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, const compat_uptr_t __user *argv, const compat_uptr_t __user *envp, int flags); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f2dcb0d57030..c1dfe99c9c3f 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns) #define __NR_sendmmsg 269 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg) #define __NR_process_vm_readv 270 -__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \ - compat_sys_process_vm_readv) +__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) #define __NR_process_vm_writev 271 -__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ - compat_sys_process_vm_writev) +__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) #define __NR_kcmp 272 __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 3f2156aab442..fd12da80b6f2 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -14,10 +14,6 @@ #include #include -#ifdef CONFIG_COMPAT -#include -#endif - /** * process_vm_rw_pages - read/write pages from task specified * @pages: array of pointers to pages we want to copy @@ -304,68 +300,3 @@ SYSCALL_DEFINE6(process_vm_writev, pid_t, pid, { return process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 1); } - -#ifdef CONFIG_COMPAT - -static ssize_t -compat_process_vm_rw(compat_pid_t pid, - const struct compat_iovec __user *lvec, - unsigned long liovcnt, - const struct compat_iovec __user *rvec, - unsigned long riovcnt, - unsigned long flags, int vm_write) -{ - struct iovec iovstack_l[UIO_FASTIOV]; - struct iovec iovstack_r[UIO_FASTIOV]; - struct iovec *iov_l = iovstack_l; - struct iovec *iov_r = iovstack_r; - struct iov_iter iter; - ssize_t rc = -EFAULT; - int dir = vm_write ? WRITE : READ; - - if (flags != 0) - return -EINVAL; - - rc = import_iovec(dir, (const struct iovec __user *)lvec, liovcnt, - UIO_FASTIOV, &iov_l, &iter); - if (rc < 0) - return rc; - if (!iov_iter_count(&iter)) - goto free_iov_l; - iov_r = iovec_from_user((const struct iovec __user *)rvec, riovcnt, - UIO_FASTIOV, iovstack_r, true); - if (IS_ERR(iov_r)) { - rc = PTR_ERR(iov_r); - goto free_iov_l; - } - rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write); - if (iov_r != iovstack_r) - kfree(iov_r); -free_iov_l: - kfree(iov_l); - return rc; -} - -COMPAT_SYSCALL_DEFINE6(process_vm_readv, compat_pid_t, pid, - const struct compat_iovec __user *, lvec, - compat_ulong_t, liovcnt, - const struct compat_iovec __user *, rvec, - compat_ulong_t, riovcnt, - compat_ulong_t, flags) -{ - return compat_process_vm_rw(pid, lvec, liovcnt, rvec, - riovcnt, flags, 0); -} - -COMPAT_SYSCALL_DEFINE6(process_vm_writev, compat_pid_t, pid, - const struct compat_iovec __user *, lvec, - compat_ulong_t, liovcnt, - const struct compat_iovec __user *, rvec, - compat_ulong_t, riovcnt, - compat_ulong_t, flags) -{ - return compat_process_vm_rw(pid, lvec, liovcnt, rvec, - riovcnt, flags, 1); -} - -#endif diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index f2dcb0d57030..c1dfe99c9c3f 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns) #define __NR_sendmmsg 269 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg) #define __NR_process_vm_readv 270 -__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \ - compat_sys_process_vm_readv) +__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) #define __NR_process_vm_writev 271 -__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ - compat_sys_process_vm_writev) +__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) #define __NR_kcmp 272 __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 26f0347c1511..a188f053cbf9 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -443,8 +443,8 @@ 348 common syncfs sys_syncfs 349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 350 common setns sys_setns -351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +351 nospu process_vm_readv sys_process_vm_readv +352 nospu process_vm_writev sys_process_vm_writev 353 nospu finit_module sys_finit_module 354 nospu kcmp sys_kcmp 355 common sched_setattr sys_sched_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 02ad81f69bb7..c44c83032c3a 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -347,8 +347,8 @@ 337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns -340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv -341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev 342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr 343 common kcmp sys_kcmp compat_sys_kcmp 344 common finit_module sys_finit_module compat_sys_finit_module diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 642af919183d..347809649ba2 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -395,8 +395,8 @@ 536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 537 x32 recvmmsg compat_sys_recvmmsg_time64 538 x32 sendmmsg compat_sys_sendmmsg -539 x32 process_vm_readv compat_sys_process_vm_readv -540 x32 process_vm_writev compat_sys_process_vm_writev +539 x32 process_vm_readv sys_process_vm_readv +540 x32 process_vm_writev sys_process_vm_writev 541 x32 setsockopt sys_setsockopt 542 x32 getsockopt sys_getsockopt 543 x32 io_setup compat_sys_io_setup -- cgit v1.3.1 From 5d90e05c0e83ee5c67fdba1fa56e590103aac9fd Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Fri, 2 Oct 2020 10:38:28 -0700 Subject: test_firmware: Test partial read support Add additional hooks to test_firmware to pass in support for partial file read using request_firmware_into_buf(): buf_size: size of buffer to request firmware into partial: indicates that a partial file request is being made file_offset: to indicate offset into file to request Also update firmware selftests to use the new partial read test API. Signed-off-by: Scott Branden Co-developed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201002173828.2099543-17-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 154 ++++++++++++++++++++-- tools/testing/selftests/firmware/fw_filesystem.sh | 91 +++++++++++++ 2 files changed, 233 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 06c955057756..2baa275a6ddf 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -52,6 +52,9 @@ struct test_batched_req { * @name: the name of the firmware file to look for * @into_buf: when the into_buf is used if this is true * request_firmware_into_buf() will be used instead. + * @buf_size: size of buf to allocate when into_buf is true + * @file_offset: file offset to request when calling request_firmware_into_buf + * @partial: partial read opt when calling request_firmware_into_buf * @sync_direct: when the sync trigger is used if this is true * request_firmware_direct() will be used instead. * @send_uevent: whether or not to send a uevent for async requests @@ -91,6 +94,9 @@ struct test_batched_req { struct test_config { char *name; bool into_buf; + size_t buf_size; + size_t file_offset; + bool partial; bool sync_direct; bool send_uevent; u8 num_requests; @@ -185,6 +191,9 @@ static int __test_firmware_config_init(void) test_fw_config->num_requests = TEST_FIRMWARE_NUM_REQS; test_fw_config->send_uevent = true; test_fw_config->into_buf = false; + test_fw_config->buf_size = TEST_FIRMWARE_BUF_SIZE; + test_fw_config->file_offset = 0; + test_fw_config->partial = false; test_fw_config->sync_direct = false; test_fw_config->req_firmware = request_firmware; test_fw_config->test_result = 0; @@ -238,28 +247,35 @@ static ssize_t config_show(struct device *dev, dev_name(dev)); if (test_fw_config->name) - len += scnprintf(buf+len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "name:\t%s\n", test_fw_config->name); else - len += scnprintf(buf+len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "name:\tEMTPY\n"); - len += scnprintf(buf+len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "num_requests:\t%u\n", test_fw_config->num_requests); - len += scnprintf(buf+len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "send_uevent:\t\t%s\n", test_fw_config->send_uevent ? "FW_ACTION_HOTPLUG" : "FW_ACTION_NOHOTPLUG"); - len += scnprintf(buf+len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "into_buf:\t\t%s\n", test_fw_config->into_buf ? "true" : "false"); - len += scnprintf(buf+len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, + "buf_size:\t%zu\n", test_fw_config->buf_size); + len += scnprintf(buf + len, PAGE_SIZE - len, + "file_offset:\t%zu\n", test_fw_config->file_offset); + len += scnprintf(buf + len, PAGE_SIZE - len, + "partial:\t\t%s\n", + test_fw_config->partial ? "true" : "false"); + len += scnprintf(buf + len, PAGE_SIZE - len, "sync_direct:\t\t%s\n", test_fw_config->sync_direct ? "true" : "false"); - len += scnprintf(buf+len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "read_fw_idx:\t%u\n", test_fw_config->read_fw_idx); mutex_unlock(&test_fw_mutex); @@ -317,6 +333,30 @@ static ssize_t test_dev_config_show_bool(char *buf, bool val) return snprintf(buf, PAGE_SIZE, "%d\n", val); } +static int test_dev_config_update_size_t(const char *buf, + size_t size, + size_t *cfg) +{ + int ret; + long new; + + ret = kstrtol(buf, 10, &new); + if (ret) + return ret; + + mutex_lock(&test_fw_mutex); + *(size_t *)cfg = new; + mutex_unlock(&test_fw_mutex); + + /* Always return full write size even if we didn't consume all */ + return size; +} + +static ssize_t test_dev_config_show_size_t(char *buf, size_t val) +{ + return snprintf(buf, PAGE_SIZE, "%zu\n", val); +} + static ssize_t test_dev_config_show_int(char *buf, int val) { return snprintf(buf, PAGE_SIZE, "%d\n", val); @@ -402,6 +442,83 @@ static ssize_t config_into_buf_show(struct device *dev, } static DEVICE_ATTR_RW(config_into_buf); +static ssize_t config_buf_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc; + + mutex_lock(&test_fw_mutex); + if (test_fw_config->reqs) { + pr_err("Must call release_all_firmware prior to changing config\n"); + rc = -EINVAL; + mutex_unlock(&test_fw_mutex); + goto out; + } + mutex_unlock(&test_fw_mutex); + + rc = test_dev_config_update_size_t(buf, count, + &test_fw_config->buf_size); + +out: + return rc; +} + +static ssize_t config_buf_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return test_dev_config_show_size_t(buf, test_fw_config->buf_size); +} +static DEVICE_ATTR_RW(config_buf_size); + +static ssize_t config_file_offset_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc; + + mutex_lock(&test_fw_mutex); + if (test_fw_config->reqs) { + pr_err("Must call release_all_firmware prior to changing config\n"); + rc = -EINVAL; + mutex_unlock(&test_fw_mutex); + goto out; + } + mutex_unlock(&test_fw_mutex); + + rc = test_dev_config_update_size_t(buf, count, + &test_fw_config->file_offset); + +out: + return rc; +} + +static ssize_t config_file_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return test_dev_config_show_size_t(buf, test_fw_config->file_offset); +} +static DEVICE_ATTR_RW(config_file_offset); + +static ssize_t config_partial_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return test_dev_config_update_bool(buf, + count, + &test_fw_config->partial); +} + +static ssize_t config_partial_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return test_dev_config_show_bool(buf, test_fw_config->partial); +} +static DEVICE_ATTR_RW(config_partial); + static ssize_t config_sync_direct_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -659,11 +776,21 @@ static int test_fw_run_batch_request(void *data) if (!test_buf) return -ENOSPC; - req->rc = request_firmware_into_buf(&req->fw, - req->name, - req->dev, - test_buf, - TEST_FIRMWARE_BUF_SIZE); + if (test_fw_config->partial) + req->rc = request_partial_firmware_into_buf + (&req->fw, + req->name, + req->dev, + test_buf, + test_fw_config->buf_size, + test_fw_config->file_offset); + else + req->rc = request_firmware_into_buf + (&req->fw, + req->name, + req->dev, + test_buf, + test_fw_config->buf_size); if (!req->fw) kfree(test_buf); } else { @@ -936,6 +1063,9 @@ static struct attribute *test_dev_attrs[] = { TEST_FW_DEV_ATTR(config_name), TEST_FW_DEV_ATTR(config_num_requests), TEST_FW_DEV_ATTR(config_into_buf), + TEST_FW_DEV_ATTR(config_buf_size), + TEST_FW_DEV_ATTR(config_file_offset), + TEST_FW_DEV_ATTR(config_partial), TEST_FW_DEV_ATTR(config_sync_direct), TEST_FW_DEV_ATTR(config_send_uevent), TEST_FW_DEV_ATTR(config_read_fw_idx), diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index fcc281373b4d..c2a2a100114b 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -149,6 +149,26 @@ config_unset_into_buf() echo 0 > $DIR/config_into_buf } +config_set_buf_size() +{ + echo $1 > $DIR/config_buf_size +} + +config_set_file_offset() +{ + echo $1 > $DIR/config_file_offset +} + +config_set_partial() +{ + echo 1 > $DIR/config_partial +} + +config_unset_partial() +{ + echo 0 > $DIR/config_partial +} + config_set_sync_direct() { echo 1 > $DIR/config_sync_direct @@ -207,6 +227,35 @@ read_firmwares() done } +read_partial_firmwares() +{ + if [ "$(cat $DIR/config_into_buf)" == "1" ]; then + fwfile="${FW_INTO_BUF}" + else + fwfile="${FW}" + fi + + if [ "$1" = "xzonly" ]; then + fwfile="${fwfile}-orig" + fi + + # Strip fwfile down to match partial offset and length + partial_data="$(cat $fwfile)" + partial_data="${partial_data:$2:$3}" + + for i in $(seq 0 3); do + config_set_read_fw_idx $i + + read_firmware="$(cat $DIR/read_firmware)" + + # Verify the contents are what we expect. + if [ $read_firmware != $partial_data ]; then + echo "request #$i: partial firmware was not loaded" >&2 + exit 1 + fi + done +} + read_firmwares_expect_nofile() { for i in $(seq 0 3); do @@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile() echo "OK" } +test_request_partial_firmware_into_buf_nofile() +{ + echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: " + config_reset + config_set_name nope-test-firmware.bin + config_set_into_buf + config_set_partial + config_set_buf_size $2 + config_set_file_offset $1 + config_trigger_sync + read_firmwares_expect_nofile + release_all_firmware + echo "OK" +} + test_batched_request_firmware_direct_nofile() { echo -n "Batched request_firmware_direct() nofile try #$1: " @@ -356,6 +420,21 @@ test_request_firmware_nowait_custom() echo "OK" } +test_request_partial_firmware_into_buf() +{ + echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: " + config_reset + config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME + config_set_into_buf + config_set_partial + config_set_buf_size $2 + config_set_file_offset $1 + config_trigger_sync + read_partial_firmwares normal $1 $2 + release_all_firmware + echo "OK" +} + # Only continue if batched request triggers are present on the # test-firmware driver test_config_present @@ -383,6 +462,12 @@ for i in $(seq 1 5); do test_request_firmware_nowait_custom $i normal done +# Partial loads cannot use fallback, so do not repeat tests. +test_request_partial_firmware_into_buf 0 10 +test_request_partial_firmware_into_buf 0 5 +test_request_partial_firmware_into_buf 1 6 +test_request_partial_firmware_into_buf 2 10 + # Test for file not found, errors are expected, the failure would be # a hung task, which would require a hard reset. echo @@ -407,6 +492,12 @@ for i in $(seq 1 5); do test_request_firmware_nowait_custom_nofile $i done +# Partial loads cannot use fallback, so do not repeat tests. +test_request_partial_firmware_into_buf_nofile 0 10 +test_request_partial_firmware_into_buf_nofile 0 5 +test_request_partial_firmware_into_buf_nofile 1 6 +test_request_partial_firmware_into_buf_nofile 2 10 + test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0 # test with both files present -- cgit v1.3.1 From e9b60476bea058d85f8029e6701d9476f7fdb92f Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 2 Oct 2020 17:26:25 +0530 Subject: kselftest/arm64: Add utilities and a test to validate mte memory This test checks that the memory tag is present after mte allocation and the memory is accessible with those tags. This testcase verifies all sync, async and none mte error reporting mode. The allocated mte buffers are verified for Allocated range (no error expected while accessing buffer), Underflow range, and Overflow range. Different test scenarios covered here are, * Verify that mte memory are accessible at byte/block level. * Force underflow and overflow to occur and check the data consistency. * Check to/from between tagged and untagged memory. * Check that initial allocated memory to have 0 tag. This change also creates the necessary infrastructure to add mte test cases. MTE kselftests can use the several utility functions provided here to add wide variety of mte test scenarios. GCC compiler need flag '-march=armv8.5-a+memtag' so those flags are verified before compilation. The mte testcases can be launched with kselftest framework as, make TARGETS=arm64 ARM64_SUBTARGETS=mte kselftest or compiled as, make -C tools/testing/selftests TARGETS=arm64 ARM64_SUBTARGETS=mte CC='compiler' Co-developed-by: Gabor Kertesz Signed-off-by: Gabor Kertesz Signed-off-by: Amit Daniel Kachhap Tested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20201002115630.24683-2-amit.kachhap@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/Makefile | 2 +- tools/testing/selftests/arm64/mte/.gitignore | 1 + tools/testing/selftests/arm64/mte/Makefile | 29 ++ .../selftests/arm64/mte/check_buffer_fill.c | 475 +++++++++++++++++++++ .../testing/selftests/arm64/mte/mte_common_util.c | 341 +++++++++++++++ .../testing/selftests/arm64/mte/mte_common_util.h | 117 +++++ tools/testing/selftests/arm64/mte/mte_def.h | 60 +++ tools/testing/selftests/arm64/mte/mte_helper.S | 114 +++++ 8 files changed, 1138 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/mte/.gitignore create mode 100644 tools/testing/selftests/arm64/mte/Makefile create mode 100644 tools/testing/selftests/arm64/mte/check_buffer_fill.c create mode 100644 tools/testing/selftests/arm64/mte/mte_common_util.c create mode 100644 tools/testing/selftests/arm64/mte/mte_common_util.h create mode 100644 tools/testing/selftests/arm64/mte/mte_def.h create mode 100644 tools/testing/selftests/arm64/mte/mte_helper.S (limited to 'tools') diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 93b567d23c8b..3723d9dea11a 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal +ARM64_SUBTARGETS ?= tags signal mte else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore new file mode 100644 index 000000000000..3f8c1f6c82b9 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -0,0 +1 @@ +check_buffer_fill diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile new file mode 100644 index 000000000000..2480226dfe57 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +CFLAGS += -std=gnu99 -I. +SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) +PROGS := $(patsubst %.c,%,$(SRCS)) + +#Add mte compiler option +ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),) +CFLAGS += -march=armv8.5-a+memtag +endif + +#check if the compiler works well +mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(mte_cc_support),1) +# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(PROGS) + +# Get Kernel headers installed and use them. +KSFT_KHDR_INSTALL := 1 +endif + +# Include KSFT lib.mk. +include ../../lib.mk + +ifeq ($(mte_cc_support),1) +$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S +endif diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c new file mode 100644 index 000000000000..242635d79035 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define OVERFLOW_RANGE MT_GRANULE_SIZE + +static int sizes[] = { + 1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +enum mte_block_test_alloc { + UNTAGGED_TAGGED, + TAGGED_UNTAGGED, + TAGGED_TAGGED, + BLOCK_ALLOC_MAX, +}; + +static int check_buffer_by_byte(int mem_type, int mode) +{ + char *ptr; + int i, j, item; + bool err; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true); + if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS) + return KSFT_FAIL; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]); + /* Set some value in tagged memory */ + for (j = 0; j < sizes[i]; j++) + ptr[j] = '1'; + mte_wait_after_trig(); + err = cur_mte_cxt.fault_valid; + /* Check the buffer whether it is filled. */ + for (j = 0; j < sizes[i] && !err; j++) { + if (ptr[j] != '1') + err = true; + } + mte_free_memory((void *)ptr, sizes[i], mem_type, true); + + if (err) + break; + } + if (!err) + return KSFT_PASS; + else + return KSFT_FAIL; +} + +static int check_buffer_underflow_by_byte(int mem_type, int mode, + int underflow_range) +{ + char *ptr; + int i, j, item, last_index; + bool err; + char *und_ptr = NULL; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, + underflow_range, 0); + if (check_allocated_memory_range(ptr, sizes[i], mem_type, + underflow_range, 0) != KSFT_PASS) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range); + last_index = 0; + /* Set some value in tagged memory and make the buffer underflow */ + for (j = sizes[i] - 1; (j >= -underflow_range) && + (cur_mte_cxt.fault_valid == false); j--) { + ptr[j] = '1'; + last_index = j; + } + mte_wait_after_trig(); + err = false; + /* Check whether the buffer is filled */ + for (j = 0; j < sizes[i]; j++) { + if (ptr[j] != '1') { + err = true; + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + j, ptr); + break; + } + } + if (err) + goto check_buffer_underflow_by_byte_err; + + switch (mode) { + case MTE_NONE_ERR: + if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) { + err = true; + break; + } + /* There were no fault so the underflow area should be filled */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range); + for (j = 0 ; j < underflow_range; j++) { + if (und_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_ASYNC_ERR: + /* Imprecise fault should occur otherwise return error */ + if (cur_mte_cxt.fault_valid == false) { + err = true; + break; + } + /* + * The imprecise fault is checked after the write to the buffer, + * so the underflow area before the fault should be filled. + */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + for (j = last_index ; j < 0 ; j++) { + if (und_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_SYNC_ERR: + /* Precise fault should occur otherwise return error */ + if (!cur_mte_cxt.fault_valid || (last_index != (-1))) { + err = true; + break; + } + /* Underflow area should not be filled */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + if (und_ptr[-1] == '1') + err = true; + break; + default: + err = true; + break; + } +check_buffer_underflow_by_byte_err: + mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0); + if (err) + break; + } + return (err ? KSFT_FAIL : KSFT_PASS); +} + +static int check_buffer_overflow_by_byte(int mem_type, int mode, + int overflow_range) +{ + char *ptr; + int i, j, item, last_index; + bool err; + size_t tagged_size, overflow_size; + char *over_ptr = NULL; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, + 0, overflow_range); + if (check_allocated_memory_range(ptr, sizes[i], mem_type, + 0, overflow_range) != KSFT_PASS) + return KSFT_FAIL; + + tagged_size = MT_ALIGN_UP(sizes[i]); + + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range); + + /* Set some value in tagged memory and make the buffer underflow */ + for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) && + (cur_mte_cxt.fault_valid == false); j++) { + ptr[j] = '1'; + last_index = j; + } + mte_wait_after_trig(); + err = false; + /* Check whether the buffer is filled */ + for (j = 0; j < sizes[i]; j++) { + if (ptr[j] != '1') { + err = true; + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + j, ptr); + break; + } + } + if (err) + goto check_buffer_overflow_by_byte_err; + + overflow_size = overflow_range - (tagged_size - sizes[i]); + + switch (mode) { + case MTE_NONE_ERR: + if ((cur_mte_cxt.fault_valid == true) || + (last_index != (sizes[i] + overflow_range - 1))) { + err = true; + break; + } + /* There were no fault so the overflow area should be filled */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size); + for (j = 0 ; j < overflow_size; j++) { + if (over_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_ASYNC_ERR: + /* Imprecise fault should occur otherwise return error */ + if (cur_mte_cxt.fault_valid == false) { + err = true; + break; + } + /* + * The imprecise fault is checked after the write to the buffer, + * so the overflow area should be filled before the fault. + */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + for (j = tagged_size ; j < last_index; j++) { + if (over_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_SYNC_ERR: + /* Precise fault should occur otherwise return error */ + if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) { + err = true; + break; + } + /* Underflow area should not be filled */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size); + for (j = 0 ; j < overflow_size; j++) { + if (over_ptr[j] == '1') + err = true; + } + break; + default: + err = true; + break; + } +check_buffer_overflow_by_byte_err: + mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range); + if (err) + break; + } + return (err ? KSFT_FAIL : KSFT_PASS); +} + +static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size) +{ + char *src, *dst; + int j, result = KSFT_PASS; + enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED; + + for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) { + switch (alloc_type) { + case UNTAGGED_TAGGED: + src = (char *)mte_allocate_memory(size, mem_type, 0, false); + if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + dst = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)src, size, mem_type, false); + return KSFT_FAIL; + } + + break; + case TAGGED_UNTAGGED: + dst = (char *)mte_allocate_memory(size, mem_type, 0, false); + if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + src = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)dst, size, mem_type, false); + return KSFT_FAIL; + } + break; + case TAGGED_TAGGED: + src = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) + return KSFT_FAIL; + + dst = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)src, size, mem_type, true); + return KSFT_FAIL; + } + break; + default: + return KSFT_FAIL; + } + + cur_mte_cxt.fault_valid = false; + result = KSFT_PASS; + mte_initialize_current_context(mode, (uintptr_t)dst, size); + /* Set some value in memory and copy*/ + memset((void *)src, (int)'1', size); + memcpy((void *)dst, (void *)src, size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) { + result = KSFT_FAIL; + goto check_buffer_by_block_err; + } + /* Check the buffer whether it is filled. */ + for (j = 0; j < size; j++) { + if (src[j] != dst[j] || src[j] != '1') { + result = KSFT_FAIL; + break; + } + } +check_buffer_by_block_err: + mte_free_memory((void *)src, size, mem_type, + MT_FETCH_TAG((uintptr_t)src) ? true : false); + mte_free_memory((void *)dst, size, mem_type, + MT_FETCH_TAG((uintptr_t)dst) ? true : false); + if (result != KSFT_PASS) + return result; + } + return result; +} + +static int check_buffer_by_block(int mem_type, int mode) +{ + int i, item, result = KSFT_PASS; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + cur_mte_cxt.fault_valid = false; + for (i = 0; i < item; i++) { + result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]); + if (result != KSFT_PASS) + break; + } + return result; +} + +static int compare_memory_tags(char *ptr, size_t size, int tag) +{ + int i, new_tag; + + for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) { + new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (tag != new_tag) { + ksft_print_msg("FAIL: child mte tag mismatch\n"); + return KSFT_FAIL; + } + } + return KSFT_PASS; +} + +static int check_memory_initial_tags(int mem_type, int mode, int mapping) +{ + char *ptr; + int run, fd; + int total = sizeof(sizes)/sizeof(int); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + /* check initial tags for anonymous mmap */ + ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false); + if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) { + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + return KSFT_FAIL; + } + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + + /* check initial tags for file mmap */ + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd); + if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) { + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + close(fd); + return KSFT_FAIL; + } + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + close(fd); + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + size_t page_size = getpagesize(); + int item = sizeof(sizes)/sizeof(int); + + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + + /* Buffer by byte tests */ + evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR), + "Check buffer correctness by byte with sync err mode and mmap memory\n"); + evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR), + "Check buffer correctness by byte with async err mode and mmap memory\n"); + evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR), + "Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n"); + evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR), + "Check buffer correctness by byte with async err mode and mmap/mprotect memory\n"); + + /* Check buffer underflow with underflow size as 16 */ + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n"); + + /* Check buffer underflow with underflow size as page size */ + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size), + "Check buffer write underflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size), + "Check buffer write underflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size), + "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n"); + + /* Check buffer overflow with overflow size as 16 */ + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n"); + + /* Buffer by block tests */ + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR), + "Check buffer write correctness by block with sync mode and mmap memory\n"); + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR), + "Check buffer write correctness by block with async mode and mmap memory\n"); + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR), + "Check buffer write correctness by block with tag fault ignore and mmap memory\n"); + + /* Initial tags are supposed to be 0 */ + evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check initial tags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n"); + evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check initial tags with shared mapping, sync error mode and mmap memory\n"); + evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c new file mode 100644 index 000000000000..39f8908988ea --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define INIT_BUFFER_SIZE 256 + +struct mte_fault_cxt cur_mte_cxt; +static unsigned int mte_cur_mode; +static unsigned int mte_cur_pstate_tco; + +void mte_default_handler(int signum, siginfo_t *si, void *uc) +{ + unsigned long addr = (unsigned long)si->si_addr; + + if (signum == SIGSEGV) { +#ifdef DEBUG + ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); +#endif + if (si->si_code == SEGV_MTEAERR) { + if (cur_mte_cxt.trig_si_code == si->si_code) + cur_mte_cxt.fault_valid = true; + return; + } + /* Compare the context for precise error */ + else if (si->si_code == SEGV_MTESERR) { + if (cur_mte_cxt.trig_si_code == si->si_code && + ((cur_mte_cxt.trig_range >= 0 && + addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + (cur_mte_cxt.trig_range < 0 && + addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { + cur_mte_cxt.fault_valid = true; + /* Adjust the pc by 4 */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + } else { + ksft_print_msg("Invalid MTE synchronous exception caught!\n"); + exit(1); + } + } else { + ksft_print_msg("Unknown SIGSEGV exception caught!\n"); + exit(1); + } + } else if (signum == SIGBUS) { + ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); + if ((cur_mte_cxt.trig_range >= 0 && + addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + (cur_mte_cxt.trig_range < 0 && + addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { + cur_mte_cxt.fault_valid = true; + /* Adjust the pc by 4 */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + } + } +} + +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)) +{ + struct sigaction sa; + + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(signal, &sa, NULL); +} + +void mte_wait_after_trig(void) +{ + sched_yield(); +} + +void *mte_insert_tags(void *ptr, size_t size) +{ + void *tag_ptr; + int align_size; + + if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { + ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + return NULL; + } + align_size = MT_ALIGN_UP(size); + tag_ptr = mte_insert_random_tag(ptr); + mte_set_tag_address_range(tag_ptr, align_size); + return tag_ptr; +} + +void mte_clear_tags(void *ptr, size_t size) +{ + if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { + ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + return; + } + size = MT_ALIGN_UP(size); + ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr); + mte_clear_tag_address_range(ptr, size); +} + +static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, + bool tags, int fd) +{ + void *ptr; + int prot_flag, map_flag; + size_t entire_size = size + range_before + range_after; + + if (mem_type != USE_MALLOC && mem_type != USE_MMAP && + mem_type != USE_MPROTECT) { + ksft_print_msg("FAIL: Invalid allocate request\n"); + return NULL; + } + if (mem_type == USE_MALLOC) + return malloc(entire_size) + range_before; + + prot_flag = PROT_READ | PROT_WRITE; + if (mem_type == USE_MMAP) + prot_flag |= PROT_MTE; + + map_flag = mapping; + if (fd == -1) + map_flag = MAP_ANONYMOUS | map_flag; + if (!(mapping & MAP_SHARED)) + map_flag |= MAP_PRIVATE; + ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0); + if (ptr == MAP_FAILED) { + ksft_print_msg("FAIL: mmap allocation\n"); + return NULL; + } + if (mem_type == USE_MPROTECT) { + if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) { + munmap(ptr, size); + ksft_print_msg("FAIL: mprotect PROT_MTE property\n"); + return NULL; + } + } + if (tags) + ptr = mte_insert_tags(ptr + range_before, size); + return ptr; +} + +void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after) +{ + return __mte_allocate_memory_range(size, mem_type, mapping, range_before, + range_after, true, -1); +} + +void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags) +{ + return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1); +} + +void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd) +{ + int index; + char buffer[INIT_BUFFER_SIZE]; + + if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) { + ksft_print_msg("FAIL: Invalid mmap file request\n"); + return NULL; + } + /* Initialize the file for mappable size */ + lseek(fd, 0, SEEK_SET); + for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) + write(fd, buffer, INIT_BUFFER_SIZE); + index -= INIT_BUFFER_SIZE; + write(fd, buffer, size - index); + return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); +} + +void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, int fd) +{ + int index; + char buffer[INIT_BUFFER_SIZE]; + int map_size = size + range_before + range_after; + + if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) { + ksft_print_msg("FAIL: Invalid mmap file request\n"); + return NULL; + } + /* Initialize the file for mappable size */ + lseek(fd, 0, SEEK_SET); + for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) + write(fd, buffer, INIT_BUFFER_SIZE); + index -= INIT_BUFFER_SIZE; + write(fd, buffer, map_size - index); + return __mte_allocate_memory_range(size, mem_type, mapping, range_before, + range_after, true, fd); +} + +static void __mte_free_memory_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after, bool tags) +{ + switch (mem_type) { + case USE_MALLOC: + free(ptr - range_before); + break; + case USE_MMAP: + case USE_MPROTECT: + if (tags) + mte_clear_tags(ptr, size); + munmap(ptr - range_before, size + range_before + range_after); + break; + default: + ksft_print_msg("FAIL: Invalid free request\n"); + break; + } +} + +void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after) +{ + __mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true); +} + +void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags) +{ + __mte_free_memory_range(ptr, size, mem_type, 0, 0, tags); +} + +void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range) +{ + cur_mte_cxt.fault_valid = false; + cur_mte_cxt.trig_addr = ptr; + cur_mte_cxt.trig_range = range; + if (mode == MTE_SYNC_ERR) + cur_mte_cxt.trig_si_code = SEGV_MTESERR; + else if (mode == MTE_ASYNC_ERR) + cur_mte_cxt.trig_si_code = SEGV_MTEAERR; + else + cur_mte_cxt.trig_si_code = 0; +} + +int mte_switch_mode(int mte_option, unsigned long incl_mask) +{ + unsigned long en = 0; + + if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR || + mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) { + ksft_print_msg("FAIL: Invalid mte config option\n"); + return -EINVAL; + } + en = PR_TAGGED_ADDR_ENABLE; + if (mte_option == MTE_SYNC_ERR) + en |= PR_MTE_TCF_SYNC; + else if (mte_option == MTE_ASYNC_ERR) + en |= PR_MTE_TCF_ASYNC; + else if (mte_option == MTE_NONE_ERR) + en |= PR_MTE_TCF_NONE; + + en |= (incl_mask << PR_MTE_TAG_SHIFT); + /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ + if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) { + ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n"); + return -EINVAL; + } + return 0; +} + +#define ID_AA64PFR1_MTE_SHIFT 8 +#define ID_AA64PFR1_MTE 2 + +int mte_default_setup(void) +{ + unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long en = 0; + int ret; + + if (!(hwcaps & HWCAP_CPUID)) { + ksft_print_msg("FAIL: CPUID registers unavailable\n"); + return KSFT_FAIL; + } + /* Read ID_AA64PFR1_EL1 register */ + asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory"); + if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) { + ksft_print_msg("FAIL: MTE features unavailable\n"); + return KSFT_SKIP; + } + /* Get current mte mode */ + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); + if (ret < 0) { + ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret); + return KSFT_FAIL; + } + if (ret & PR_MTE_TCF_SYNC) + mte_cur_mode = MTE_SYNC_ERR; + else if (ret & PR_MTE_TCF_ASYNC) + mte_cur_mode = MTE_ASYNC_ERR; + else if (ret & PR_MTE_TCF_NONE) + mte_cur_mode = MTE_NONE_ERR; + + mte_cur_pstate_tco = mte_get_pstate_tco(); + /* Disable PSTATE.TCO */ + mte_disable_pstate_tco(); + return 0; +} + +void mte_restore_setup(void) +{ + mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG); + if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN) + mte_enable_pstate_tco(); + else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS) + mte_disable_pstate_tco(); +} + +int create_temp_file(void) +{ + int fd; + char filename[] = "/dev/shm/tmp_XXXXXX"; + + /* Create a file in the tmpfs filesystem */ + fd = mkstemp(&filename[0]); + if (fd == -1) { + ksft_print_msg("FAIL: Unable to open temporary file\n"); + return 0; + } + unlink(&filename[0]); + return fd; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h new file mode 100644 index 000000000000..45160e061a0e --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _MTE_COMMON_UTIL_H +#define _MTE_COMMON_UTIL_H + +#include +#include +#include +#include +#include +#include +#include "mte_def.h" +#include "kselftest.h" + +enum mte_mem_type { + USE_MALLOC, + USE_MMAP, + USE_MPROTECT, +}; + +enum mte_mode { + MTE_NONE_ERR, + MTE_SYNC_ERR, + MTE_ASYNC_ERR, +}; + +struct mte_fault_cxt { + /* Address start which triggers mte tag fault */ + unsigned long trig_addr; + /* Address range for mte tag fault and negative value means underflow */ + ssize_t trig_range; + /* siginfo si code */ + unsigned long trig_si_code; + /* Flag to denote if correct fault caught */ + bool fault_valid; +}; + +extern struct mte_fault_cxt cur_mte_cxt; + +/* MTE utility functions */ +void mte_default_handler(int signum, siginfo_t *si, void *uc); +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)); +void mte_wait_after_trig(void); +void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags); +void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after); +void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, + bool tags, int fd); +void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, int fd); +void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags); +void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after); +void *mte_insert_tags(void *ptr, size_t size); +void mte_clear_tags(void *ptr, size_t size); +int mte_default_setup(void); +void mte_restore_setup(void); +int mte_switch_mode(int mte_option, unsigned long incl_mask); +void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range); + +/* Common utility functions */ +int create_temp_file(void); + +/* Assembly MTE utility functions */ +void *mte_insert_random_tag(void *ptr); +void *mte_get_tag_address(void *ptr); +void mte_set_tag_address_range(void *ptr, int range); +void mte_clear_tag_address_range(void *ptr, int range); +void mte_disable_pstate_tco(void); +void mte_enable_pstate_tco(void); +unsigned int mte_get_pstate_tco(void); + +/* Test framework static inline functions/macros */ +static inline void evaluate_test(int err, const char *msg) +{ + if (err == KSFT_PASS) + ksft_test_result_pass(msg); + else if (err == KSFT_FAIL) + ksft_test_result_fail(msg); +} + +static inline int check_allocated_memory(void *ptr, size_t size, + int mem_type, bool tags) +{ + if (ptr == NULL) { + ksft_print_msg("FAIL: memory allocation\n"); + return KSFT_FAIL; + } + + if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr); + mte_free_memory((void *)ptr, size, mem_type, false); + return KSFT_FAIL; + } + + return KSFT_PASS; +} + +static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after) +{ + if (ptr == NULL) { + ksft_print_msg("FAIL: memory allocation\n"); + return KSFT_FAIL; + } + + if (!MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr); + mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before, + range_after); + return KSFT_FAIL; + } + return KSFT_PASS; +} + +#endif /* _MTE_COMMON_UTIL_H */ diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h new file mode 100644 index 000000000000..9b188254b61a --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_def.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +/* + * Below definitions may be found in kernel headers, However, they are + * redefined here to decouple the MTE selftests compilations from them. + */ +#ifndef SEGV_MTEAERR +#define SEGV_MTEAERR 8 +#endif +#ifndef SEGV_MTESERR +#define SEGV_MTESERR 9 +#endif +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + +#ifndef PR_MTE_TCF_SHIFT +#define PR_MTE_TCF_SHIFT 1 +#endif +#ifndef PR_MTE_TCF_NONE +#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_SYNC +#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_ASYNC +#define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TAG_SHIFT +#define PR_MTE_TAG_SHIFT 3 +#endif + +/* MTE Hardware feature definitions below. */ +#define MT_TAG_SHIFT 56 +#define MT_TAG_MASK 0xFUL +#define MT_FREE_TAG 0x0UL +#define MT_GRANULE_SIZE 16 +#define MT_TAG_COUNT 16 +#define MT_INCLUDE_TAG_MASK 0xFFFF +#define MT_EXCLUDE_TAG_MASK 0x0 + +#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1) +#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT)) +#define MT_SET_TAG(x, y) ((x) | (y << MT_TAG_SHIFT)) +#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK)) +#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE)) + +#define MT_PSTATE_TCO_SHIFT 25 +#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT) +#define MT_PSTATE_TCO_EN 1 +#define MT_PSTATE_TCO_DIS 0 + +#define MT_EXCLUDE_TAG(x) (1 << (x)) +#define MT_INCLUDE_VALID_TAG(x) (MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x)) +#define MT_INCLUDE_VALID_TAGS(x) (MT_INCLUDE_TAG_MASK ^ (x)) +#define MTE_ALLOW_NON_ZERO_TAG MT_INCLUDE_VALID_TAG(0) diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S new file mode 100644 index 000000000000..48e049fbad9a --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_helper.S @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#include "mte_def.h" + +#define ENTRY(name) \ + .globl name ;\ + .p2align 2;\ + .type name, @function ;\ +name: + +#define ENDPROC(name) \ + .size name, .-name ; + + .text +/* + * mte_insert_random_tag: Insert random tag and might be same as the source tag if + * the source pointer has it. + * Input: + * x0 - source pointer with a tag/no-tag + * Return: + * x0 - pointer with random tag + */ +ENTRY(mte_insert_random_tag) + irg x0, x0, xzr + ret +ENDPROC(mte_insert_random_tag) + +/* + * mte_get_tag_address: Get the tag from given address. + * Input: + * x0 - source pointer + * Return: + * x0 - pointer with appended tag + */ +ENTRY(mte_get_tag_address) + ldg x0, [x0] + ret +ENDPROC(mte_get_tag_address) + +/* + * mte_set_tag_address_range: Set the tag range from the given address + * Input: + * x0 - source pointer with tag data + * x1 - range + * Return: + * none + */ +ENTRY(mte_set_tag_address_range) + cbz x1, 2f +1: + stg x0, [x0, #0x0] + add x0, x0, #MT_GRANULE_SIZE + sub x1, x1, #MT_GRANULE_SIZE + cbnz x1, 1b +2: + ret +ENDPROC(mte_set_tag_address_range) + +/* + * mt_clear_tag_address_range: Clear the tag range from the given address + * Input: + * x0 - source pointer with tag data + * x1 - range + * Return: + * none + */ +ENTRY(mte_clear_tag_address_range) + cbz x1, 2f +1: + stzg x0, [x0, #0x0] + add x0, x0, #MT_GRANULE_SIZE + sub x1, x1, #MT_GRANULE_SIZE + cbnz x1, 1b +2: + ret +ENDPROC(mte_clear_tag_address_range) + +/* + * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * none + */ +ENTRY(mte_enable_pstate_tco) + msr tco, #MT_PSTATE_TCO_EN + ret +ENDPROC(mte_enable_pstate_tco) + +/* + * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * none + */ +ENTRY(mte_disable_pstate_tco) + msr tco, #MT_PSTATE_TCO_DIS + ret +ENDPROC(mte_disable_pstate_tco) + +/* + * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * x0 + */ +ENTRY(mte_get_pstate_tco) + mrs x0, tco + ubfx x0, x0, #MT_PSTATE_TCO_SHIFT, #1 + ret +ENDPROC(mte_get_pstate_tco) -- cgit v1.3.1 From f3b2a26ca78da2ef36cf76d5511e3c94baee96a1 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 2 Oct 2020 17:26:26 +0530 Subject: kselftest/arm64: Verify mte tag inclusion via prctl This testcase verifies that the tag generated with "irg" instruction contains only included tags. This is done via prtcl call. This test covers 4 scenarios, * At least one included tag. * More than one included tags. * All included. * None included. Co-developed-by: Gabor Kertesz Signed-off-by: Gabor Kertesz Signed-off-by: Amit Daniel Kachhap Tested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20201002115630.24683-3-amit.kachhap@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/.gitignore | 1 + .../selftests/arm64/mte/check_tags_inclusion.c | 185 +++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 tools/testing/selftests/arm64/mte/check_tags_inclusion.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore index 3f8c1f6c82b9..c3fca255d3d6 100644 --- a/tools/testing/selftests/arm64/mte/.gitignore +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -1 +1,2 @@ check_buffer_fill +check_tags_inclusion diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c new file mode 100644 index 000000000000..94d245a0ed56 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define BUFFER_SIZE (5 * MT_GRANULE_SIZE) +#define RUNS (MT_TAG_COUNT * 2) +#define MTE_LAST_TAG_MASK (0x7FFF) + +static int verify_mte_pointer_validity(char *ptr, int mode) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); + /* Check the validity of the tagged pointer */ + memset((void *)ptr, '1', BUFFER_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) + return KSFT_FAIL; + /* Proceed further for nonzero tags */ + if (!MT_FETCH_TAG((uintptr_t)ptr)) + return KSFT_PASS; + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1); + /* Check the validity outside the range */ + ptr[BUFFER_SIZE] = '2'; + mte_wait_after_trig(); + if (!cur_mte_cxt.fault_valid) + return KSFT_FAIL; + else + return KSFT_PASS; +} + +static int check_single_included_tags(int mem_type, int mode) +{ + char *ptr; + int tag, run, result = KSFT_PASS; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) { + mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Check tag value */ + if (MT_FETCH_TAG((uintptr_t)ptr) == tag) { + ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + MT_FETCH_TAG((uintptr_t)ptr), + MT_INCLUDE_VALID_TAG(tag)); + result = KSFT_FAIL; + break; + } + result = verify_mte_pointer_validity(ptr, mode); + } + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_multiple_included_tags(int mem_type, int mode) +{ + char *ptr; + int tag, run, result = KSFT_PASS; + unsigned long excl_mask = 0; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) { + excl_mask |= 1 << tag; + mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask)); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Check tag value */ + if (MT_FETCH_TAG((uintptr_t)ptr) < tag) { + ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + MT_FETCH_TAG((uintptr_t)ptr), + MT_INCLUDE_VALID_TAGS(excl_mask)); + result = KSFT_FAIL; + break; + } + result = verify_mte_pointer_validity(ptr, mode); + } + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_all_included_tags(int mem_type, int mode) +{ + char *ptr; + int run, result = KSFT_PASS; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* + * Here tag byte can be between 0x0 to 0xF (full allowed range) + * so no need to match so just verify if it is writable. + */ + result = verify_mte_pointer_validity(ptr, mode); + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_none_included_tags(int mem_type, int mode) +{ + char *ptr; + int run; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; run < RUNS; run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Here all tags exluded so tag value generated should be 0 */ + if (MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: included tag value found\n"); + mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true); + return KSFT_FAIL; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); + /* Check the write validity of the untagged pointer */ + memset((void *)ptr, '1', BUFFER_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) + break; + } + mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false); + if (cur_mte_cxt.fault_valid) + return KSFT_FAIL; + else + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + + evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check an included tag value with sync mode\n"); + evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check different included tags value with sync mode\n"); + evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check none included tags value with sync mode\n"); + evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check all included tags value with sync mode\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} -- cgit v1.3.1 From dfe537cf47186bfb59db3e57eed7417cd5efde17 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 2 Oct 2020 17:26:27 +0530 Subject: kselftest/arm64: Check forked child mte memory accessibility This test covers the mte memory behaviour of the forked process with different mapping properties and flags. It checks that all bytes of forked child memory are accessible with the same tag as that of the parent and memory accessed outside the tag range causes fault to occur. Co-developed-by: Gabor Kertesz Signed-off-by: Gabor Kertesz Signed-off-by: Amit Daniel Kachhap Tested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20201002115630.24683-4-amit.kachhap@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/.gitignore | 1 + .../selftests/arm64/mte/check_child_memory.c | 195 +++++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 tools/testing/selftests/arm64/mte/check_child_memory.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore index c3fca255d3d6..b5fcc0fb4d97 100644 --- a/tools/testing/selftests/arm64/mte/.gitignore +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -1,2 +1,3 @@ check_buffer_fill check_tags_inclusion +check_child_memory diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c new file mode 100644 index 000000000000..97bebdecd29e --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define BUFFER_SIZE (5 * MT_GRANULE_SIZE) +#define RUNS (MT_TAG_COUNT) +#define UNDERFLOW MT_GRANULE_SIZE +#define OVERFLOW MT_GRANULE_SIZE + +static size_t page_size; +static int sizes[] = { + 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +static int check_child_tag_inheritance(char *ptr, int size, int mode) +{ + int i, parent_tag, child_tag, fault, child_status; + pid_t child; + + parent_tag = MT_FETCH_TAG((uintptr_t)ptr); + fault = 0; + + child = fork(); + if (child == -1) { + ksft_print_msg("FAIL: child process creation\n"); + return KSFT_FAIL; + } else if (child == 0) { + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + /* Do copy on write */ + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) { + fault = 1; + goto check_child_tag_inheritance_err; + } + for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) { + child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (parent_tag != child_tag) { + ksft_print_msg("FAIL: child mte tag mismatch\n"); + fault = 1; + goto check_child_tag_inheritance_err; + } + } + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memset(ptr - UNDERFLOW, '2', UNDERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false) { + fault = 1; + goto check_child_tag_inheritance_err; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memset(ptr + size, '3', OVERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false) { + fault = 1; + goto check_child_tag_inheritance_err; + } +check_child_tag_inheritance_err: + _exit(fault); + } + /* Wait for child process to terminate */ + wait(&child_status); + if (WIFEXITED(child_status)) + fault = WEXITSTATUS(child_status); + else + fault = 1; + return (fault) ? KSFT_FAIL : KSFT_PASS; +} + +static int check_child_memory_mapping(int mem_type, int mode, int mapping) +{ + char *ptr; + int run, result; + int item = sizeof(sizes)/sizeof(int); + + item = sizeof(sizes)/sizeof(int); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < item; run++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) + return KSFT_FAIL; + result = check_child_tag_inheritance(ptr, sizes[run], mode); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + if (result == KSFT_FAIL) + return result; + } + return KSFT_PASS; +} + +static int check_child_file_mapping(int mem_type, int mode, int mapping) +{ + char *ptr, *map_ptr; + int run, fd, map_size, result = KSFT_PASS; + int total = sizeof(sizes)/sizeof(int); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on file based memory\n"); + munmap((void *)map_ptr, map_size); + close(fd); + return KSFT_FAIL; + } + result = check_child_tag_inheritance(ptr, sizes[run], mode); + mte_clear_tags((void *)ptr, sizes[run]); + munmap((void *)map_ptr, map_size); + close(fd); + if (result != KSFT_PASS) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + int item = sizeof(sizes)/sizeof(int); + + page_size = getpagesize(); + if (!page_size) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler); + + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n"); + + evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} -- cgit v1.3.1 From 53ec81d232134f61806dfd93025320caa1aaf559 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 2 Oct 2020 17:26:28 +0530 Subject: kselftest/arm64: Verify all different mmap MTE options This testcase checks the different unsupported/supported options for mmap if used with PROT_MTE memory protection flag. These checks are, * Either pstate.tco enable or prctl PR_MTE_TCF_NONE option should not cause any tag mismatch faults. * Different combinations of anonymous/file memory mmap, mprotect, sync/async error mode and private/shared mappings should work. * mprotect should not be able to clear the PROT_MTE page property. Co-developed-by: Gabor Kertesz Signed-off-by: Gabor Kertesz Signed-off-by: Amit Daniel Kachhap Tested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20201002115630.24683-5-amit.kachhap@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/.gitignore | 1 + .../selftests/arm64/mte/check_mmap_options.c | 262 +++++++++++++++++++++ 2 files changed, 263 insertions(+) create mode 100644 tools/testing/selftests/arm64/mte/check_mmap_options.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore index b5fcc0fb4d97..79a215d3bbd0 100644 --- a/tools/testing/selftests/arm64/mte/.gitignore +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -1,3 +1,4 @@ check_buffer_fill check_tags_inclusion check_child_memory +check_mmap_options diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c new file mode 100644 index 000000000000..33b13b86199b --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define RUNS (MT_TAG_COUNT) +#define UNDERFLOW MT_GRANULE_SIZE +#define OVERFLOW MT_GRANULE_SIZE +#define TAG_CHECK_ON 0 +#define TAG_CHECK_OFF 1 + +static size_t page_size; +static int sizes[] = { + 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memset(ptr - UNDERFLOW, '2', UNDERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON) + return KSFT_FAIL; + if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memset(ptr + size, '3', OVERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON) + return KSFT_FAIL; + if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int run, result, map_size; + int item = sizeof(sizes)/sizeof(int); + + item = sizeof(sizes)/sizeof(int); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < item; run++) { + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n"); + munmap((void *)map_ptr, map_size); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, tag_check); + mte_clear_tags((void *)ptr, sizes[run]); + mte_free_memory((void *)map_ptr, map_size, mem_type, false); + if (result == KSFT_FAIL) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int run, fd, map_size; + int total = sizeof(sizes)/sizeof(int); + int result = KSFT_PASS; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + + map_size = sizes[run] + UNDERFLOW + OVERFLOW; + map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on file based memory\n"); + munmap((void *)map_ptr, map_size); + close(fd); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, tag_check); + mte_clear_tags((void *)ptr, sizes[run]); + munmap((void *)map_ptr, map_size); + close(fd); + if (result == KSFT_FAIL) + break; + } + return result; +} + +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +{ + char *ptr, *map_ptr; + int run, prot_flag, result, fd, map_size; + int total = sizeof(sizes)/sizeof(int); + + prot_flag = PROT_READ | PROT_WRITE; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) + return KSFT_FAIL; + map_ptr = ptr - UNDERFLOW; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW); + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + if (result != KSFT_PASS) + return KSFT_FAIL; + + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW, fd); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + map_ptr = ptr - UNDERFLOW; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW); + close(fd); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + close(fd); + if (result != KSFT_PASS) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + int item = sizeof(sizes)/sizeof(int); + + err = mte_default_setup(); + if (err) + return err; + page_size = getpagesize(); + if (!page_size) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + + mte_enable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n"); + + mte_disable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n"); + + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} -- cgit v1.3.1 From f981d8fa26469e19d2ce73006685ae88205e914a Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 2 Oct 2020 17:26:29 +0530 Subject: kselftest/arm64: Verify KSM page merge for MTE pages Add a testcase to check that KSM should not merge pages containing same data with same/different MTE tag values. This testcase has one positive tests and passes if page merging happens according to the above rule. It also saves and restores any modified ksm sysfs entries. Signed-off-by: Amit Daniel Kachhap Tested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20201002115630.24683-6-amit.kachhap@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/.gitignore | 1 + .../selftests/arm64/mte/check_ksm_options.c | 159 +++++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 tools/testing/selftests/arm64/mte/check_ksm_options.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore index 79a215d3bbd0..44e9bfdaeca6 100644 --- a/tools/testing/selftests/arm64/mte/.gitignore +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -2,3 +2,4 @@ check_buffer_fill check_tags_inclusion check_child_memory check_mmap_options +check_ksm_options diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c new file mode 100644 index 000000000000..bc41ae630c86 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define TEST_UNIT 10 +#define PATH_KSM "/sys/kernel/mm/ksm/" +#define MAX_LOOP 4 + +static size_t page_sz; +static unsigned long ksm_sysfs[5]; + +static unsigned long read_sysfs(char *str) +{ + FILE *f; + unsigned long val = 0; + + f = fopen(str, "r"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return 0; + } + fscanf(f, "%lu", &val); + fclose(f); + return val; +} + +static void write_sysfs(char *str, unsigned long val) +{ + FILE *f; + + f = fopen(str, "w"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return; + } + fprintf(f, "%lu", val); + fclose(f); +} + +static void mte_ksm_setup(void) +{ + ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes"); + write_sysfs(PATH_KSM "merge_across_nodes", 1); + ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs"); + write_sysfs(PATH_KSM "sleep_millisecs", 0); + ksm_sysfs[2] = read_sysfs(PATH_KSM "run"); + write_sysfs(PATH_KSM "run", 1); + ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing"); + write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT); + ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan"); + write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT); +} + +static void mte_ksm_restore(void) +{ + write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]); + write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]); + write_sysfs(PATH_KSM "run", ksm_sysfs[2]); + write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]); + write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]); +} + +static void mte_ksm_scan(void) +{ + int cur_count = read_sysfs(PATH_KSM "full_scans"); + int scan_count = cur_count + 1; + int max_loop_count = MAX_LOOP; + + while ((cur_count < scan_count) && max_loop_count) { + sleep(1); + cur_count = read_sysfs(PATH_KSM "full_scans"); + max_loop_count--; + } +#ifdef DEBUG + ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n", + read_sysfs(PATH_KSM "pages_shared"), + read_sysfs(PATH_KSM "pages_sharing")); +#endif +} + +static int check_madvise_options(int mem_type, int mode, int mapping) +{ + char *ptr; + int err, ret; + + err = KSFT_FAIL; + if (access(PATH_KSM, F_OK) == -1) { + ksft_print_msg("ERR: Kernel KSM config not enabled\n"); + return err; + } + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true); + if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + /* Insert same data in all the pages */ + memset(ptr, 'A', TEST_UNIT * page_sz); + ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE); + if (ret) { + ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n"); + goto madvise_err; + } + mte_ksm_scan(); + /* Tagged pages should not merge */ + if ((read_sysfs(PATH_KSM "pages_shared") < 1) || + (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1))) + err = KSFT_PASS; +madvise_err: + mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true); + return err; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + page_sz = getpagesize(); + if (!page_sz) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + /* Enable KSM */ + mte_ksm_setup(); + + evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check KSM mte page merge for private mapping, sync mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check KSM mte page merge for private mapping, async mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check KSM mte page merge for shared mapping, sync mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check KSM mte page merge for shared mapping, async mode and mmap memory\n"); + + mte_ksm_restore(); + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} -- cgit v1.3.1 From 4dafc08d0ba4768e8540f49ab40c3ea26e40d554 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 2 Oct 2020 17:26:30 +0530 Subject: kselftest/arm64: Check mte tagged user address in kernel Add a testcase to check that user address with valid/invalid mte tag works in kernel mode. This test verifies that the kernel API's __arch_copy_from_user/__arch_copy_to_user works by considering if the user pointer has valid/invalid allocation tags. In MTE sync mode, file memory read/write and other similar interfaces fails if a user memory with invalid tag is accessed in kernel. In async mode no such failure occurs. Signed-off-by: Amit Daniel Kachhap Tested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20201002115630.24683-7-amit.kachhap@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/.gitignore | 1 + tools/testing/selftests/arm64/mte/check_user_mem.c | 111 +++++++++++++++++++++ .../testing/selftests/arm64/mte/mte_common_util.h | 1 + tools/testing/selftests/arm64/mte/mte_helper.S | 14 +++ 4 files changed, 127 insertions(+) create mode 100644 tools/testing/selftests/arm64/mte/check_user_mem.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore index 44e9bfdaeca6..bc3ac63f3314 100644 --- a/tools/testing/selftests/arm64/mte/.gitignore +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -3,3 +3,4 @@ check_tags_inclusion check_child_memory check_mmap_options check_ksm_options +check_user_mem diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c new file mode 100644 index 000000000000..594e98e76880 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +static size_t page_sz; + +static int check_usermem_access_fault(int mem_type, int mode, int mapping) +{ + int fd, i, err; + char val = 'A'; + size_t len, read_len; + void *ptr, *ptr_next; + + err = KSFT_FAIL; + len = 2 * page_sz; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + for (i = 0; i < len; i++) + write(fd, &val, sizeof(val)); + lseek(fd, 0, 0); + ptr = mte_allocate_memory(len, mem_type, mapping, true); + if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, len); + /* Copy from file into buffer with valid tag */ + read_len = read(fd, ptr, len); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid || read_len < len) + goto usermem_acc_err; + /* Verify same pattern is read */ + for (i = 0; i < len; i++) + if (*(char *)(ptr + i) != val) + break; + if (i < len) + goto usermem_acc_err; + + /* Tag the next half of memory with different value */ + ptr_next = (void *)((unsigned long)ptr + page_sz); + ptr_next = mte_insert_new_tag(ptr_next); + mte_set_tag_address_range(ptr_next, page_sz); + + lseek(fd, 0, 0); + /* Copy from file into buffer with invalid tag */ + read_len = read(fd, ptr, len); + mte_wait_after_trig(); + /* + * Accessing user memory in kernel with invalid tag should fail in sync + * mode without fault but may not fail in async mode as per the + * implemented MTE userspace support in Arm64 kernel. + */ + if (mode == MTE_SYNC_ERR && + !cur_mte_cxt.fault_valid && read_len < len) { + err = KSFT_PASS; + } else if (mode == MTE_ASYNC_ERR && + !cur_mte_cxt.fault_valid && read_len == len) { + err = KSFT_PASS; + } +usermem_acc_err: + mte_free_memory((void *)ptr, len, mem_type, true); + close(fd); + return err; +} + +int main(int argc, char *argv[]) +{ + int err; + + page_sz = getpagesize(); + if (!page_sz) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + err = mte_default_setup(); + if (err) + return err; + /* Register signal handlers */ + mte_register_signal(SIGSEGV, mte_default_handler); + + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); + + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check memory access from kernel in async mode, private mapping and mmap memory\n"); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 45160e061a0e..195a7d1879e6 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -64,6 +64,7 @@ int create_temp_file(void); /* Assembly MTE utility functions */ void *mte_insert_random_tag(void *ptr); +void *mte_insert_new_tag(void *ptr); void *mte_get_tag_address(void *ptr); void mte_set_tag_address_range(void *ptr, int range); void mte_clear_tag_address_range(void *ptr, int range); diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S index 48e049fbad9a..a02c04cd0aac 100644 --- a/tools/testing/selftests/arm64/mte/mte_helper.S +++ b/tools/testing/selftests/arm64/mte/mte_helper.S @@ -26,6 +26,20 @@ ENTRY(mte_insert_random_tag) ret ENDPROC(mte_insert_random_tag) +/* + * mte_insert_new_tag: Insert new tag and different from the source tag if + * source pointer has it. + * Input: + * x0 - source pointer with a tag/no-tag + * Return: + * x0 - pointer with random tag + */ +ENTRY(mte_insert_new_tag) + gmi x1, x0, xzr + irg x0, x0, x1 + ret +ENDPROC(mte_insert_new_tag) + /* * mte_get_tag_address: Get the tag from given address. * Input: -- cgit v1.3.1 From efc5b2e73cc0a8aabf377bcbd9296546e9cc9b2d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Sun, 4 Oct 2020 18:43:38 +0300 Subject: tools/testing/scatterlist: Rejuvenate bit-rotten test A couple small tweaks are needed to make the test build and run on current kernels. Link: https://lore.kernel.org/r/20201004154340.1080481-3-leon@kernel.org Signed-off-by: Tvrtko Ursulin Cc: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- tools/testing/scatterlist/Makefile | 3 ++- tools/testing/scatterlist/linux/mm.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile index cbb003d9305e..c65233876622 100644 --- a/tools/testing/scatterlist/Makefile +++ b/tools/testing/scatterlist/Makefile @@ -14,7 +14,7 @@ targets: include $(TARGETS) main: $(OFILES) clean: - $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h + $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h linux/slab.h asm/io.h @rmdir asm scatterlist.c: ../../../lib/scatterlist.c @@ -28,4 +28,5 @@ include: ../../../include/linux/scatterlist.h @touch asm/io.h @touch linux/highmem.h @touch linux/kmemleak.h + @touch linux/slab.h @cp $< linux/scatterlist.h diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 6f9ac14aa800..6ae907f375d2 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -114,6 +114,12 @@ static inline void *kmalloc(unsigned int size, unsigned int flags) return malloc(size); } +static inline void * +kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) +{ + return malloc(n * size); +} + #define kfree(x) free(x) #define kmemleak_alloc(a, b, c, d) @@ -122,4 +128,33 @@ static inline void *kmalloc(unsigned int size, unsigned int flags) #define PageSlab(p) (0) #define flush_kernel_dcache_page(p) +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) + +static inline void * __must_check ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long __must_check PTR_ERR(__force const void *ptr) +{ + return (long) ptr; +} + +static inline bool __must_check IS_ERR(__force const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + else + return 0; +} + +#define IS_ENABLED(x) (0) + #endif -- cgit v1.3.1 From 29d88681fb49f1e8225addd1e783bad1f7d95afc Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Sun, 4 Oct 2020 18:43:39 +0300 Subject: tools/testing/scatterlist: Show errors in human readable form Instead of just asserting dump some more useful info about what the test saw versus what it expected to see. Link: https://lore.kernel.org/r/20201004154340.1080481-4-leon@kernel.org Signed-off-by: Tvrtko Ursulin Cc: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- tools/testing/scatterlist/main.c | 44 +++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 0a1464181226..c6db0a1989b2 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -5,6 +5,15 @@ #define MAX_PAGES (64) +struct test { + int alloc_ret; + unsigned num_pages; + unsigned *pfn; + unsigned size; + unsigned int max_seg; + unsigned int expected_segments; +}; + static void set_pages(struct page **pages, const unsigned *array, unsigned num) { unsigned int i; @@ -17,17 +26,32 @@ static void set_pages(struct page **pages, const unsigned *array, unsigned num) #define pfn(...) (unsigned []){ __VA_ARGS__ } +static void fail(struct test *test, struct sg_table *st, const char *cond) +{ + unsigned int i; + + fprintf(stderr, "Failed on '%s'!\n\n", cond); + + printf("size = %u, max segment = %u, expected nents = %u\nst->nents = %u, st->orig_nents= %u\n", + test->size, test->max_seg, test->expected_segments, st->nents, + st->orig_nents); + + printf("%u input PFNs:", test->num_pages); + for (i = 0; i < test->num_pages; i++) + printf(" %x", test->pfn[i]); + printf("\n"); + + exit(1); +} + +#define VALIDATE(cond, st, test) \ + if (!(cond)) \ + fail((test), (st), #cond); + int main(void) { const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; - struct test { - int alloc_ret; - unsigned num_pages; - unsigned *pfn; - unsigned size; - unsigned int max_seg; - unsigned int expected_segments; - } *test, tests[] = { + struct test *test, tests[] = { { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, @@ -67,8 +91,8 @@ int main(void) if (test->alloc_ret) continue; - assert(st.nents == test->expected_segments); - assert(st.orig_nents == test->expected_segments); + VALIDATE(st.nents == test->expected_segments, &st, test); + VALIDATE(st.orig_nents == test->expected_segments, &st, test); sg_free_table(&st); } -- cgit v1.3.1 From 07da1223ec939982497db3caccd6215b55acc35c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 4 Oct 2020 18:43:37 +0300 Subject: lib/scatterlist: Add support in dynamic allocation of SG table from pages Extend __sg_alloc_table_from_pages to support dynamic allocation of SG table from pages. It should be used by drivers that can't supply all the pages at one time. This function returns the last populated SGE in the table. Users should pass it as an argument to the function from the second call and forward. As before, nents will be equal to the number of populated SGEs (chunks). With this new extension, drivers can benefit the optimization of merging contiguous pages without a need to allocate all pages in advance and hold them in a large buffer. E.g. with the Infiniband driver that allocates a single page for hold the pages. For 1TB memory registration, the temporary buffer would consume only 4KB, instead of 2GB. Link: https://lore.kernel.org/r/20201004154340.1080481-2-leon@kernel.org Signed-off-by: Maor Gottlieb Reviewed-by: Christoph Hellwig Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 12 +-- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 15 ++-- include/linux/scatterlist.h | 38 +++++---- lib/scatterlist.c | 125 ++++++++++++++++++++++------ tools/testing/scatterlist/main.c | 9 +- 5 files changed, 142 insertions(+), 57 deletions(-) (limited to 'tools') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 12b30075134a..f2eaed6aca3d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; unsigned int sg_page_sizes; + struct scatterlist *sg; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOMEM); alloc_table: - ret = __sg_alloc_table_from_pages(st, pvec, num_pages, - 0, num_pages << PAGE_SHIFT, - max_segment, - GFP_KERNEL); - if (ret) { + sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, + num_pages << PAGE_SHIFT, max_segment, + NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { kfree(st); - return ERR_PTR(ret); + return ERR_CAST(sg); } ret = i915_gem_gtt_prepare_pages(obj, st); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index ab524ab3b0b4..f22acd398b1f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -419,6 +419,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) int ret = 0; static size_t sgl_size; static size_t sgt_size; + struct scatterlist *sg; if (vmw_tt->mapped) return 0; @@ -441,13 +442,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) if (unlikely(ret != 0)) return ret; - ret = __sg_alloc_table_from_pages - (&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, - (unsigned long) vsgt->num_pages << PAGE_SHIFT, - dma_get_max_seg_size(dev_priv->dev->dev), - GFP_KERNEL); - if (unlikely(ret != 0)) + sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages, + vsgt->num_pages, 0, + (unsigned long) vsgt->num_pages << PAGE_SHIFT, + dma_get_max_seg_size(dev_priv->dev->dev), + NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); goto out_sg_alloc_fail; + } if (vsgt->num_pages > vmw_tt->sgt.nents) { uint64_t over_alloc = diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 45cf7b69d852..36c47e7e66a2 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, #define for_each_sgtable_dma_sg(sgt, sg, i) \ for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) +static inline void __sg_chain(struct scatterlist *chain_sg, + struct scatterlist *sgl) +{ + /* + * offset and length are unused for chain entry. Clear them. + */ + chain_sg->offset = 0; + chain_sg->length = 0; + + /* + * Set lowest bit to indicate a link pointer, and make sure to clear + * the termination bit if it happens to be set. + */ + chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END; +} + /** * sg_chain - Chain two sglists together * @prv: First scatterlist @@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, struct scatterlist *sgl) { - /* - * offset and length are unused for chain entry. Clear them. - */ - prv[prv_nents - 1].offset = 0; - prv[prv_nents - 1].length = 0; - - /* - * Set lowest bit to indicate a link pointer, and make sure to clear - * the termination bit if it happens to be set. - */ - prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN) - & ~SG_END; + __sg_chain(&prv[prv_nents - 1], sgl); } /** @@ -286,10 +291,11 @@ void sg_free_table(struct sg_table *); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); -int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - gfp_t gfp_mask); +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + struct scatterlist *prv, unsigned int left_pages, + gfp_t gfp_mask); int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 5d63a8857f36..e102fdfaa75b 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) } EXPORT_SYMBOL(sg_alloc_table); +static struct scatterlist *get_next_sg(struct sg_table *table, + struct scatterlist *cur, + unsigned long needed_sges, + gfp_t gfp_mask) +{ + struct scatterlist *new_sg, *next_sg; + unsigned int alloc_size; + + if (cur) { + next_sg = sg_next(cur); + /* Check if last entry should be keeped for chainning */ + if (!sg_is_last(next_sg) || needed_sges == 1) + return next_sg; + } + + alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC); + new_sg = sg_kmalloc(alloc_size, gfp_mask); + if (!new_sg) + return ERR_PTR(-ENOMEM); + sg_init_table(new_sg, alloc_size); + if (cur) { + __sg_chain(next_sg, new_sg); + table->orig_nents += alloc_size - 1; + } else { + table->sgl = new_sg; + table->orig_nents = alloc_size; + table->nents = 0; + } + return new_sg; +} + /** * __sg_alloc_table_from_pages - Allocate and initialize an sg table from * an array of pages @@ -374,29 +405,63 @@ EXPORT_SYMBOL(sg_alloc_table); * @offset: Offset from start of the first page to the start of a buffer * @size: Number of valid bytes in the buffer (after offset) * @max_segment: Maximum size of a scatterlist node in bytes (page aligned) + * @prv: Last populated sge in sgt + * @left_pages: Left pages caller have to set after this call * @gfp_mask: GFP allocation mask * - * Description: - * Allocate and initialize an sg table from a list of pages. Contiguous - * ranges of the pages are squashed into a single scatterlist node up to the - * maximum size specified in @max_segment. An user may provide an offset at a - * start and a size of valid data in a buffer specified by the page array. - * The returned sg table is released by sg_free_table. + * Description: + * If @prv is NULL, allocate and initialize an sg table from a list of pages, + * else reuse the scatterlist passed in at @prv. + * Contiguous ranges of the pages are squashed into a single scatterlist + * entry up to the maximum size specified in @max_segment. A user may + * provide an offset at a start and a size of valid data in a buffer + * specified by the page array. * * Returns: - * 0 on success, negative error on failure + * Last SGE in sgt on success, PTR_ERR on otherwise. + * The allocation in @sgt must be released by sg_free_table. + * + * Notes: + * If this function returns non-0 (eg failure), the caller must call + * sg_free_table() to cleanup any leftover allocations. */ -int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - gfp_t gfp_mask) +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + struct scatterlist *prv, unsigned int left_pages, + gfp_t gfp_mask) { - unsigned int chunks, cur_page, seg_len, i; - int ret; - struct scatterlist *s; + unsigned int chunks, cur_page, seg_len, i, prv_len = 0; + unsigned int added_nents = 0; + struct scatterlist *s = prv; if (WARN_ON(!max_segment || offset_in_page(max_segment))) - return -EINVAL; + return ERR_PTR(-EINVAL); + + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv) + return ERR_PTR(-EOPNOTSUPP); + + if (prv) { + unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE + + prv->offset + prv->length) / + PAGE_SIZE; + + if (WARN_ON(offset)) + return ERR_PTR(-EINVAL); + + /* Merge contiguous pages into the last SG */ + prv_len = prv->length; + while (n_pages && page_to_pfn(pages[0]) == paddr) { + if (prv->length + PAGE_SIZE > max_segment) + break; + prv->length += PAGE_SIZE; + paddr++; + pages++; + n_pages--; + } + if (!n_pages) + goto out; + } /* compute number of contiguous chunks */ chunks = 1; @@ -410,13 +475,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, } } - ret = sg_alloc_table(sgt, chunks, gfp_mask); - if (unlikely(ret)) - return ret; - /* merging chunks and putting them into the scatterlist */ cur_page = 0; - for_each_sg(sgt->sgl, s, sgt->orig_nents, i) { + for (i = 0; i < chunks; i++) { unsigned int j, chunk_size; /* look for the end of the current chunk */ @@ -429,15 +490,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, break; } + /* Pass how many chunks might be left */ + s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask); + if (IS_ERR(s)) { + /* + * Adjust entry length to be as before function was + * called. + */ + if (prv) + prv->length = prv_len; + return s; + } chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; sg_set_page(s, pages[cur_page], min_t(unsigned long, size, chunk_size), offset); + added_nents++; size -= chunk_size; offset = 0; cur_page = j; } - - return 0; + sgt->nents += added_nents; +out: + if (!left_pages) + sg_mark_end(s); + return s; } EXPORT_SYMBOL(__sg_alloc_table_from_pages); @@ -465,8 +541,9 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask) { - return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size, - SCATTERLIST_MAX_SEGMENT, gfp_mask); + return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages, + offset, size, SCATTERLIST_MAX_SEGMENT, + NULL, 0, gfp_mask)); } EXPORT_SYMBOL(sg_alloc_table_from_pages); diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index c6db0a1989b2..b2c7e9f7b8d3 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -79,14 +79,13 @@ int main(void) for (i = 0, test = tests; test->expected_segments; test++, i++) { struct page *pages[MAX_PAGES]; struct sg_table st; - int ret; + struct scatterlist *sg; set_pages(pages, test->pfn, test->num_pages); - ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages, - 0, test->size, test->max_seg, - GFP_KERNEL); - assert(ret == test->alloc_ret); + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, + test->size, test->max_seg, NULL, 0, GFP_KERNEL); + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); if (test->alloc_ret) continue; -- cgit v1.3.1 From ec6347bb43395cb92126788a1a5b25302543f815 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Oct 2020 20:40:16 -0700 Subject: x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}() In reaction to a proposal to introduce a memcpy_mcsafe_fast() implementation Linus points out that memcpy_mcsafe() is poorly named relative to communicating the scope of the interface. Specifically what addresses are valid to pass as source, destination, and what faults / exceptions are handled. Of particular concern is that even though x86 might be able to handle the semantics of copy_mc_to_user() with its common copy_user_generic() implementation other archs likely need / want an explicit path for this case: On Fri, May 1, 2020 at 11:28 AM Linus Torvalds wrote: > > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams wrote: > > > > However now I see that copy_user_generic() works for the wrong reason. > > It works because the exception on the source address due to poison > > looks no different than a write fault on the user address to the > > caller, it's still just a short copy. So it makes copy_to_user() work > > for the wrong reason relative to the name. > > Right. > > And it won't work that way on other architectures. On x86, we have a > generic function that can take faults on either side, and we use it > for both cases (and for the "in_user" case too), but that's an > artifact of the architecture oddity. > > In fact, it's probably wrong even on x86 - because it can hide bugs - > but writing those things is painful enough that everybody prefers > having just one function. Replace a single top-level memcpy_mcsafe() with either copy_mc_to_user(), or copy_mc_to_kernel(). Introduce an x86 copy_mc_fragile() name as the rename for the low-level x86 implementation formerly named memcpy_mcsafe(). It is used as the slow / careful backend that is supplanted by a fast copy_mc_generic() in a follow-on patch. One side-effect of this reorganization is that separating copy_mc_64.S to its own file means that perf no longer needs to track dependencies for its memcpy_64.S benchmarks. [ bp: Massage a bit. ] Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Michael Ellerman Cc: Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/string.h | 2 - arch/powerpc/include/asm/uaccess.h | 40 ++-- arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/copy_mc_64.S | 242 +++++++++++++++++++++ arch/powerpc/lib/memcpy_mcsafe_64.S | 242 --------------------- arch/x86/Kconfig | 2 +- arch/x86/Kconfig.debug | 2 +- arch/x86/include/asm/copy_mc_test.h | 75 +++++++ arch/x86/include/asm/mce.h | 9 + arch/x86/include/asm/mcsafe_test.h | 75 ------- arch/x86/include/asm/string_64.h | 32 --- arch/x86/include/asm/uaccess.h | 9 + arch/x86/include/asm/uaccess_64.h | 20 -- arch/x86/kernel/cpu/mce/core.c | 8 +- arch/x86/kernel/quirks.c | 10 +- arch/x86/lib/Makefile | 1 + arch/x86/lib/copy_mc.c | 82 +++++++ arch/x86/lib/copy_mc_64.S | 127 +++++++++++ arch/x86/lib/memcpy_64.S | 115 ---------- arch/x86/lib/usercopy_64.c | 21 -- drivers/md/dm-writecache.c | 15 +- drivers/nvdimm/claim.c | 2 +- drivers/nvdimm/pmem.c | 6 +- include/linux/string.h | 9 +- include/linux/uaccess.h | 13 ++ include/linux/uio.h | 10 +- lib/Kconfig | 7 +- lib/iov_iter.c | 48 ++-- tools/arch/x86/include/asm/mcsafe_test.h | 13 -- tools/arch/x86/lib/memcpy_64.S | 115 ---------- tools/objtool/check.c | 4 +- tools/perf/bench/Build | 1 - tools/perf/bench/mem-memcpy-x86-64-lib.c | 24 -- tools/testing/nvdimm/test/nfit.c | 49 +++-- .../testing/selftests/powerpc/copyloops/.gitignore | 2 +- tools/testing/selftests/powerpc/copyloops/Makefile | 6 +- .../selftests/powerpc/copyloops/copy_mc_64.S | 1 + .../selftests/powerpc/copyloops/memcpy_mcsafe_64.S | 1 - 39 files changed, 673 insertions(+), 771 deletions(-) create mode 100644 arch/powerpc/lib/copy_mc_64.S delete mode 100644 arch/powerpc/lib/memcpy_mcsafe_64.S create mode 100644 arch/x86/include/asm/copy_mc_test.h delete mode 100644 arch/x86/include/asm/mcsafe_test.h create mode 100644 arch/x86/lib/copy_mc.c create mode 100644 arch/x86/lib/copy_mc_64.S delete mode 100644 tools/arch/x86/include/asm/mcsafe_test.h delete mode 100644 tools/perf/bench/mem-memcpy-x86-64-lib.c create mode 120000 tools/testing/selftests/powerpc/copyloops/copy_mc_64.S delete mode 120000 tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S (limited to 'tools') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1f48bbfb3ce9..76473eda3426 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -136,7 +136,7 @@ config PPC select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE - select ARCH_HAS_UACCESS_MCSAFE if PPC64 + select ARCH_HAS_COPY_MC if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 283552cd0e58..2aa0e31e6884 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n); #ifndef CONFIG_KASAN #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 -#define __HAVE_ARCH_MEMCPY_MCSAFE -extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz); extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 00699903f1ef..20a35373cafc 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -435,6 +435,32 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_generic(void *to, const void *from, unsigned long size); + +static inline unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned long size) +{ + return copy_mc_generic(to, from, size); +} +#define copy_mc_to_kernel copy_mc_to_kernel + +static inline unsigned long __must_check +copy_mc_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(check_copy_size(from, n, true))) { + if (access_ok(to, n)) { + allow_write_to_user(to, n); + n = copy_mc_generic((void *)to, from, n); + prevent_write_to_user(to, n); + } + } + + return n; +} +#endif + #ifdef __powerpc64__ static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) @@ -523,20 +549,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) return ret; } -static __always_inline unsigned long __must_check -copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) -{ - if (likely(check_copy_size(from, n, true))) { - if (access_ok(to, n)) { - allow_write_to_user(to, n); - n = memcpy_mcsafe((void *)to, from, n); - prevent_write_to_user(to, n); - } - } - - return n; -} - unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index d66a645503eb..69a91b571845 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ memcpy_power7.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - memcpy_64.o memcpy_mcsafe_64.o + memcpy_64.o copy_mc_64.o ifndef CONFIG_PPC_QUEUED_SPINLOCKS obj64-$(CONFIG_SMP) += locks.o diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S new file mode 100644 index 000000000000..88d46c471493 --- /dev/null +++ b/arch/powerpc/lib/copy_mc_64.S @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) IBM Corporation, 2011 + * Derived from copyuser_power7.s by Anton Blanchard + * Author - Balbir Singh + */ +#include +#include +#include + + .macro err1 +100: + EX_TABLE(100b,.Ldo_err1) + .endm + + .macro err2 +200: + EX_TABLE(200b,.Ldo_err2) + .endm + + .macro err3 +300: EX_TABLE(300b,.Ldone) + .endm + +.Ldo_err2: + ld r22,STK_REG(R22)(r1) + ld r21,STK_REG(R21)(r1) + ld r20,STK_REG(R20)(r1) + ld r19,STK_REG(R19)(r1) + ld r18,STK_REG(R18)(r1) + ld r17,STK_REG(R17)(r1) + ld r16,STK_REG(R16)(r1) + ld r15,STK_REG(R15)(r1) + ld r14,STK_REG(R14)(r1) + addi r1,r1,STACKFRAMESIZE +.Ldo_err1: + /* Do a byte by byte copy to get the exact remaining size */ + mtctr r7 +46: +err3; lbz r0,0(r4) + addi r4,r4,1 +err3; stb r0,0(r3) + addi r3,r3,1 + bdnz 46b + li r3,0 + blr + +.Ldone: + mfctr r3 + blr + + +_GLOBAL(copy_mc_generic) + mr r7,r5 + cmpldi r5,16 + blt .Lshort_copy + +.Lcopy: + /* Get the source 8B aligned */ + neg r6,r4 + mtocrf 0x01,r6 + clrldi r6,r6,(64-3) + + bf cr7*4+3,1f +err1; lbz r0,0(r4) + addi r4,r4,1 +err1; stb r0,0(r3) + addi r3,r3,1 + subi r7,r7,1 + +1: bf cr7*4+2,2f +err1; lhz r0,0(r4) + addi r4,r4,2 +err1; sth r0,0(r3) + addi r3,r3,2 + subi r7,r7,2 + +2: bf cr7*4+1,3f +err1; lwz r0,0(r4) + addi r4,r4,4 +err1; stw r0,0(r3) + addi r3,r3,4 + subi r7,r7,4 + +3: sub r5,r5,r6 + cmpldi r5,128 + + mflr r0 + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(R14)(r1) + std r15,STK_REG(R15)(r1) + std r16,STK_REG(R16)(r1) + std r17,STK_REG(R17)(r1) + std r18,STK_REG(R18)(r1) + std r19,STK_REG(R19)(r1) + std r20,STK_REG(R20)(r1) + std r21,STK_REG(R21)(r1) + std r22,STK_REG(R22)(r1) + std r0,STACKFRAMESIZE+16(r1) + + blt 5f + srdi r6,r5,7 + mtctr r6 + + /* Now do cacheline (128B) sized loads and stores. */ + .align 5 +4: +err2; ld r0,0(r4) +err2; ld r6,8(r4) +err2; ld r8,16(r4) +err2; ld r9,24(r4) +err2; ld r10,32(r4) +err2; ld r11,40(r4) +err2; ld r12,48(r4) +err2; ld r14,56(r4) +err2; ld r15,64(r4) +err2; ld r16,72(r4) +err2; ld r17,80(r4) +err2; ld r18,88(r4) +err2; ld r19,96(r4) +err2; ld r20,104(r4) +err2; ld r21,112(r4) +err2; ld r22,120(r4) + addi r4,r4,128 +err2; std r0,0(r3) +err2; std r6,8(r3) +err2; std r8,16(r3) +err2; std r9,24(r3) +err2; std r10,32(r3) +err2; std r11,40(r3) +err2; std r12,48(r3) +err2; std r14,56(r3) +err2; std r15,64(r3) +err2; std r16,72(r3) +err2; std r17,80(r3) +err2; std r18,88(r3) +err2; std r19,96(r3) +err2; std r20,104(r3) +err2; std r21,112(r3) +err2; std r22,120(r3) + addi r3,r3,128 + subi r7,r7,128 + bdnz 4b + + clrldi r5,r5,(64-7) + + /* Up to 127B to go */ +5: srdi r6,r5,4 + mtocrf 0x01,r6 + +6: bf cr7*4+1,7f +err2; ld r0,0(r4) +err2; ld r6,8(r4) +err2; ld r8,16(r4) +err2; ld r9,24(r4) +err2; ld r10,32(r4) +err2; ld r11,40(r4) +err2; ld r12,48(r4) +err2; ld r14,56(r4) + addi r4,r4,64 +err2; std r0,0(r3) +err2; std r6,8(r3) +err2; std r8,16(r3) +err2; std r9,24(r3) +err2; std r10,32(r3) +err2; std r11,40(r3) +err2; std r12,48(r3) +err2; std r14,56(r3) + addi r3,r3,64 + subi r7,r7,64 + +7: ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) + ld r18,STK_REG(R18)(r1) + ld r19,STK_REG(R19)(r1) + ld r20,STK_REG(R20)(r1) + ld r21,STK_REG(R21)(r1) + ld r22,STK_REG(R22)(r1) + addi r1,r1,STACKFRAMESIZE + + /* Up to 63B to go */ + bf cr7*4+2,8f +err1; ld r0,0(r4) +err1; ld r6,8(r4) +err1; ld r8,16(r4) +err1; ld r9,24(r4) + addi r4,r4,32 +err1; std r0,0(r3) +err1; std r6,8(r3) +err1; std r8,16(r3) +err1; std r9,24(r3) + addi r3,r3,32 + subi r7,r7,32 + + /* Up to 31B to go */ +8: bf cr7*4+3,9f +err1; ld r0,0(r4) +err1; ld r6,8(r4) + addi r4,r4,16 +err1; std r0,0(r3) +err1; std r6,8(r3) + addi r3,r3,16 + subi r7,r7,16 + +9: clrldi r5,r5,(64-4) + + /* Up to 15B to go */ +.Lshort_copy: + mtocrf 0x01,r5 + bf cr7*4+0,12f +err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ +err1; lwz r6,4(r4) + addi r4,r4,8 +err1; stw r0,0(r3) +err1; stw r6,4(r3) + addi r3,r3,8 + subi r7,r7,8 + +12: bf cr7*4+1,13f +err1; lwz r0,0(r4) + addi r4,r4,4 +err1; stw r0,0(r3) + addi r3,r3,4 + subi r7,r7,4 + +13: bf cr7*4+2,14f +err1; lhz r0,0(r4) + addi r4,r4,2 +err1; sth r0,0(r3) + addi r3,r3,2 + subi r7,r7,2 + +14: bf cr7*4+3,15f +err1; lbz r0,0(r4) +err1; stb r0,0(r3) + +15: li r3,0 + blr + +EXPORT_SYMBOL_GPL(copy_mc_generic); diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S deleted file mode 100644 index cb882d9a6d8a..000000000000 --- a/arch/powerpc/lib/memcpy_mcsafe_64.S +++ /dev/null @@ -1,242 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) IBM Corporation, 2011 - * Derived from copyuser_power7.s by Anton Blanchard - * Author - Balbir Singh - */ -#include -#include -#include - - .macro err1 -100: - EX_TABLE(100b,.Ldo_err1) - .endm - - .macro err2 -200: - EX_TABLE(200b,.Ldo_err2) - .endm - - .macro err3 -300: EX_TABLE(300b,.Ldone) - .endm - -.Ldo_err2: - ld r22,STK_REG(R22)(r1) - ld r21,STK_REG(R21)(r1) - ld r20,STK_REG(R20)(r1) - ld r19,STK_REG(R19)(r1) - ld r18,STK_REG(R18)(r1) - ld r17,STK_REG(R17)(r1) - ld r16,STK_REG(R16)(r1) - ld r15,STK_REG(R15)(r1) - ld r14,STK_REG(R14)(r1) - addi r1,r1,STACKFRAMESIZE -.Ldo_err1: - /* Do a byte by byte copy to get the exact remaining size */ - mtctr r7 -46: -err3; lbz r0,0(r4) - addi r4,r4,1 -err3; stb r0,0(r3) - addi r3,r3,1 - bdnz 46b - li r3,0 - blr - -.Ldone: - mfctr r3 - blr - - -_GLOBAL(memcpy_mcsafe) - mr r7,r5 - cmpldi r5,16 - blt .Lshort_copy - -.Lcopy: - /* Get the source 8B aligned */ - neg r6,r4 - mtocrf 0x01,r6 - clrldi r6,r6,(64-3) - - bf cr7*4+3,1f -err1; lbz r0,0(r4) - addi r4,r4,1 -err1; stb r0,0(r3) - addi r3,r3,1 - subi r7,r7,1 - -1: bf cr7*4+2,2f -err1; lhz r0,0(r4) - addi r4,r4,2 -err1; sth r0,0(r3) - addi r3,r3,2 - subi r7,r7,2 - -2: bf cr7*4+1,3f -err1; lwz r0,0(r4) - addi r4,r4,4 -err1; stw r0,0(r3) - addi r3,r3,4 - subi r7,r7,4 - -3: sub r5,r5,r6 - cmpldi r5,128 - - mflr r0 - stdu r1,-STACKFRAMESIZE(r1) - std r14,STK_REG(R14)(r1) - std r15,STK_REG(R15)(r1) - std r16,STK_REG(R16)(r1) - std r17,STK_REG(R17)(r1) - std r18,STK_REG(R18)(r1) - std r19,STK_REG(R19)(r1) - std r20,STK_REG(R20)(r1) - std r21,STK_REG(R21)(r1) - std r22,STK_REG(R22)(r1) - std r0,STACKFRAMESIZE+16(r1) - - blt 5f - srdi r6,r5,7 - mtctr r6 - - /* Now do cacheline (128B) sized loads and stores. */ - .align 5 -4: -err2; ld r0,0(r4) -err2; ld r6,8(r4) -err2; ld r8,16(r4) -err2; ld r9,24(r4) -err2; ld r10,32(r4) -err2; ld r11,40(r4) -err2; ld r12,48(r4) -err2; ld r14,56(r4) -err2; ld r15,64(r4) -err2; ld r16,72(r4) -err2; ld r17,80(r4) -err2; ld r18,88(r4) -err2; ld r19,96(r4) -err2; ld r20,104(r4) -err2; ld r21,112(r4) -err2; ld r22,120(r4) - addi r4,r4,128 -err2; std r0,0(r3) -err2; std r6,8(r3) -err2; std r8,16(r3) -err2; std r9,24(r3) -err2; std r10,32(r3) -err2; std r11,40(r3) -err2; std r12,48(r3) -err2; std r14,56(r3) -err2; std r15,64(r3) -err2; std r16,72(r3) -err2; std r17,80(r3) -err2; std r18,88(r3) -err2; std r19,96(r3) -err2; std r20,104(r3) -err2; std r21,112(r3) -err2; std r22,120(r3) - addi r3,r3,128 - subi r7,r7,128 - bdnz 4b - - clrldi r5,r5,(64-7) - - /* Up to 127B to go */ -5: srdi r6,r5,4 - mtocrf 0x01,r6 - -6: bf cr7*4+1,7f -err2; ld r0,0(r4) -err2; ld r6,8(r4) -err2; ld r8,16(r4) -err2; ld r9,24(r4) -err2; ld r10,32(r4) -err2; ld r11,40(r4) -err2; ld r12,48(r4) -err2; ld r14,56(r4) - addi r4,r4,64 -err2; std r0,0(r3) -err2; std r6,8(r3) -err2; std r8,16(r3) -err2; std r9,24(r3) -err2; std r10,32(r3) -err2; std r11,40(r3) -err2; std r12,48(r3) -err2; std r14,56(r3) - addi r3,r3,64 - subi r7,r7,64 - -7: ld r14,STK_REG(R14)(r1) - ld r15,STK_REG(R15)(r1) - ld r16,STK_REG(R16)(r1) - ld r17,STK_REG(R17)(r1) - ld r18,STK_REG(R18)(r1) - ld r19,STK_REG(R19)(r1) - ld r20,STK_REG(R20)(r1) - ld r21,STK_REG(R21)(r1) - ld r22,STK_REG(R22)(r1) - addi r1,r1,STACKFRAMESIZE - - /* Up to 63B to go */ - bf cr7*4+2,8f -err1; ld r0,0(r4) -err1; ld r6,8(r4) -err1; ld r8,16(r4) -err1; ld r9,24(r4) - addi r4,r4,32 -err1; std r0,0(r3) -err1; std r6,8(r3) -err1; std r8,16(r3) -err1; std r9,24(r3) - addi r3,r3,32 - subi r7,r7,32 - - /* Up to 31B to go */ -8: bf cr7*4+3,9f -err1; ld r0,0(r4) -err1; ld r6,8(r4) - addi r4,r4,16 -err1; std r0,0(r3) -err1; std r6,8(r3) - addi r3,r3,16 - subi r7,r7,16 - -9: clrldi r5,r5,(64-4) - - /* Up to 15B to go */ -.Lshort_copy: - mtocrf 0x01,r5 - bf cr7*4+0,12f -err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ -err1; lwz r6,4(r4) - addi r4,r4,8 -err1; stw r0,0(r3) -err1; stw r6,4(r3) - addi r3,r3,8 - subi r7,r7,8 - -12: bf cr7*4+1,13f -err1; lwz r0,0(r4) - addi r4,r4,4 -err1; stw r0,0(r3) - addi r3,r3,4 - subi r7,r7,4 - -13: bf cr7*4+2,14f -err1; lhz r0,0(r4) - addi r4,r4,2 -err1; sth r0,0(r3) - addi r3,r3,2 - subi r7,r7,2 - -14: bf cr7*4+3,15f -err1; lbz r0,0(r4) -err1; stb r0,0(r3) - -15: li r3,0 - blr - -EXPORT_SYMBOL_GPL(memcpy_mcsafe); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7101ac64bb20..e876b3a087f9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -75,7 +75,7 @@ config X86 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 - select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE + select ARCH_HAS_COPY_MC if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ee1d3c5834c6..27b5e2bc6a01 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -62,7 +62,7 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. -config MCSAFE_TEST +config COPY_MC_TEST def_bool n config EFI_PGT_DUMP diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h new file mode 100644 index 000000000000..e4991ba96726 --- /dev/null +++ b/arch/x86/include/asm/copy_mc_test.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _COPY_MC_TEST_H_ +#define _COPY_MC_TEST_H_ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_COPY_MC_TEST +extern unsigned long copy_mc_test_src; +extern unsigned long copy_mc_test_dst; + +static inline void copy_mc_inject_src(void *addr) +{ + if (addr) + copy_mc_test_src = (unsigned long) addr; + else + copy_mc_test_src = ~0UL; +} + +static inline void copy_mc_inject_dst(void *addr) +{ + if (addr) + copy_mc_test_dst = (unsigned long) addr; + else + copy_mc_test_dst = ~0UL; +} +#else /* CONFIG_COPY_MC_TEST */ +static inline void copy_mc_inject_src(void *addr) +{ +} + +static inline void copy_mc_inject_dst(void *addr) +{ +} +#endif /* CONFIG_COPY_MC_TEST */ + +#else /* __ASSEMBLY__ */ +#include + +#ifdef CONFIG_COPY_MC_TEST +.macro COPY_MC_TEST_CTL + .pushsection .data + .align 8 + .globl copy_mc_test_src + copy_mc_test_src: + .quad 0 + EXPORT_SYMBOL_GPL(copy_mc_test_src) + .globl copy_mc_test_dst + copy_mc_test_dst: + .quad 0 + EXPORT_SYMBOL_GPL(copy_mc_test_dst) + .popsection +.endm + +.macro COPY_MC_TEST_SRC reg count target + leaq \count(\reg), %r9 + cmp copy_mc_test_src, %r9 + ja \target +.endm + +.macro COPY_MC_TEST_DST reg count target + leaq \count(\reg), %r9 + cmp copy_mc_test_dst, %r9 + ja \target +.endm +#else +.macro COPY_MC_TEST_CTL +.endm + +.macro COPY_MC_TEST_SRC reg count target +.endm + +.macro COPY_MC_TEST_DST reg count target +.endm +#endif /* CONFIG_COPY_MC_TEST */ +#endif /* __ASSEMBLY__ */ +#endif /* _COPY_MC_TEST_H_ */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 109af5c7f515..ba2062d6df92 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb); extern int mce_p5_enabled; +#ifdef CONFIG_ARCH_HAS_COPY_MC +extern void enable_copy_mc_fragile(void); +unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt); +#else +static inline void enable_copy_mc_fragile(void) +{ +} +#endif + #ifdef CONFIG_X86_MCE int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h deleted file mode 100644 index eb59804b6201..000000000000 --- a/arch/x86/include/asm/mcsafe_test.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MCSAFE_TEST_H_ -#define _MCSAFE_TEST_H_ - -#ifndef __ASSEMBLY__ -#ifdef CONFIG_MCSAFE_TEST -extern unsigned long mcsafe_test_src; -extern unsigned long mcsafe_test_dst; - -static inline void mcsafe_inject_src(void *addr) -{ - if (addr) - mcsafe_test_src = (unsigned long) addr; - else - mcsafe_test_src = ~0UL; -} - -static inline void mcsafe_inject_dst(void *addr) -{ - if (addr) - mcsafe_test_dst = (unsigned long) addr; - else - mcsafe_test_dst = ~0UL; -} -#else /* CONFIG_MCSAFE_TEST */ -static inline void mcsafe_inject_src(void *addr) -{ -} - -static inline void mcsafe_inject_dst(void *addr) -{ -} -#endif /* CONFIG_MCSAFE_TEST */ - -#else /* __ASSEMBLY__ */ -#include - -#ifdef CONFIG_MCSAFE_TEST -.macro MCSAFE_TEST_CTL - .pushsection .data - .align 8 - .globl mcsafe_test_src - mcsafe_test_src: - .quad 0 - EXPORT_SYMBOL_GPL(mcsafe_test_src) - .globl mcsafe_test_dst - mcsafe_test_dst: - .quad 0 - EXPORT_SYMBOL_GPL(mcsafe_test_dst) - .popsection -.endm - -.macro MCSAFE_TEST_SRC reg count target - leaq \count(\reg), %r9 - cmp mcsafe_test_src, %r9 - ja \target -.endm - -.macro MCSAFE_TEST_DST reg count target - leaq \count(\reg), %r9 - cmp mcsafe_test_dst, %r9 - ja \target -.endm -#else -.macro MCSAFE_TEST_CTL -.endm - -.macro MCSAFE_TEST_SRC reg count target -.endm - -.macro MCSAFE_TEST_DST reg count target -.endm -#endif /* CONFIG_MCSAFE_TEST */ -#endif /* __ASSEMBLY__ */ -#endif /* _MCSAFE_TEST_H_ */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 75314c3dbe47..6e450827f677 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct); #endif -#define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, - size_t cnt); -DECLARE_STATIC_KEY_FALSE(mcsafe_key); - -/** - * memcpy_mcsafe - copy memory with indication if a machine check happened - * - * @dst: destination address - * @src: source address - * @cnt: number of bytes to copy - * - * Low level memory copy function that catches machine checks - * We only call into the "safe" function on systems that can - * actually do machine check recovery. Everyone else can just - * use memcpy(). - * - * Return 0 for success, or number of bytes not copied if there was an - * exception. - */ -static __always_inline __must_check unsigned long -memcpy_mcsafe(void *dst, const void *src, size_t cnt) -{ -#ifdef CONFIG_X86_MCE - if (static_branch_unlikely(&mcsafe_key)) - return __memcpy_mcsafe(dst, src, cnt); - else -#endif - memcpy(dst, src, cnt); - return 0; -} - #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 void __memcpy_flushcache(void *dst, const void *src, size_t cnt); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index ecefaffd15d4..eff7fb847149 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n); unsigned long __must_check clear_user(void __user *mem, unsigned long len); unsigned long __must_check __clear_user(void __user *mem, unsigned long len); +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned len); +#define copy_mc_to_kernel copy_mc_to_kernel + +unsigned long __must_check +copy_mc_to_user(void *to, const void *from, unsigned len); +#endif + /* * movsl can be slow when source and dest are not both 8-byte aligned */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index bc10e3dc64fe..e7265a552f4f 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len) return ret; } -static __always_inline __must_check unsigned long -copy_to_user_mcsafe(void *to, const void *from, unsigned len) -{ - unsigned long ret; - - __uaccess_begin(); - /* - * Note, __memcpy_mcsafe() is explicitly used since it can - * handle exceptions / faults. memcpy_mcsafe() may fall back to - * memcpy() which lacks this handling. - */ - ret = __memcpy_mcsafe(to, from, len); - __uaccess_end(); - return ret; -} - static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { @@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) kasan_check_write(dst, size); return __copy_user_flushcache(dst, src, size); } - -unsigned long -mcsafe_handle_tail(char *to, char *from, unsigned len); - #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 11b6697be010..b5b70f4b351d 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -2124,7 +2123,7 @@ void mce_disable_bank(int bank) and older. * mce=nobootlog Don't log MCEs from before booting. * mce=bios_cmci_threshold Don't program the CMCI threshold - * mce=recovery force enable memcpy_mcsafe() + * mce=recovery force enable copy_mc_fragile() */ static int __init mcheck_enable(char *str) { @@ -2732,13 +2731,10 @@ static void __init mcheck_debugfs_init(void) static void __init mcheck_debugfs_init(void) { } #endif -DEFINE_STATIC_KEY_FALSE(mcsafe_key); -EXPORT_SYMBOL_GPL(mcsafe_key); - static int __init mcheck_late_init(void) { if (mca_cfg.recovery) - static_branch_inc(&mcsafe_key); + enable_copy_mc_fragile(); mcheck_debugfs_init(); diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 1b10717c9321..6d0df6a58873 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -8,6 +8,7 @@ #include #include +#include #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) @@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, amd_disable_seq_and_redirect_scrub); -#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) -#include -#include - /* Ivy Bridge, Haswell, Broadwell */ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) { @@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) pci_read_config_dword(pdev, 0x84, &capid0); if (capid0 & 0x10) - static_branch_inc(&mcsafe_key); + enable_copy_mc_fragile(); } /* Skylake */ @@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) * enabled, so memory machine check recovery is also enabled. */ if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) - static_branch_inc(&mcsafe_key); + enable_copy_mc_fragile(); } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); @@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); #endif -#endif bool x86_apple_machine; EXPORT_SYMBOL(x86_apple_machine); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index d46fff11f06f..f7d23c9c22b2 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o lib-y := delay.o misc.o cmdline.o cpu.o lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c new file mode 100644 index 000000000000..2633635530b7 --- /dev/null +++ b/arch/x86/lib/copy_mc.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */ + +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_X86_MCE +/* + * See COPY_MC_TEST for self-test of the copy_mc_fragile() + * implementation. + */ +static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key); + +void enable_copy_mc_fragile(void) +{ + static_branch_inc(©_mc_fragile_key); +} +#define copy_mc_fragile_enabled (static_branch_unlikely(©_mc_fragile_key)) + +/* + * Similar to copy_user_handle_tail, probe for the write fault point, or + * source exception point. + */ +__visible notrace unsigned long +copy_mc_fragile_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++, from++) + if (copy_mc_fragile(to, from, 1)) + break; + return len; +} +#else +/* + * No point in doing careful copying, or consulting a static key when + * there is no #MC handler in the CONFIG_X86_MCE=n case. + */ +void enable_copy_mc_fragile(void) +{ +} +#define copy_mc_fragile_enabled (0) +#endif + +/** + * copy_mc_to_kernel - memory copy that handles source exceptions + * + * @dst: destination address + * @src: source address + * @len: number of bytes to copy + * + * Call into the 'fragile' version on systems that have trouble + * actually do machine check recovery. Everyone else can just + * use memcpy(). + * + * Return 0 for success, or number of bytes not copied if there was an + * exception. + */ +unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len) +{ + if (copy_mc_fragile_enabled) + return copy_mc_fragile(dst, src, len); + memcpy(dst, src, len); + return 0; +} +EXPORT_SYMBOL_GPL(copy_mc_to_kernel); + +unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len) +{ + unsigned long ret; + + if (!copy_mc_fragile_enabled) + return copy_user_generic(dst, src, len); + + __uaccess_begin(); + ret = copy_mc_fragile(dst, src, len); + __uaccess_end(); + return ret; +} diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S new file mode 100644 index 000000000000..c3b613c4544a --- /dev/null +++ b/arch/x86/lib/copy_mc_64.S @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */ + +#include +#include +#include +#include + +#ifndef CONFIG_UML + +#ifdef CONFIG_X86_MCE +COPY_MC_TEST_CTL + +/* + * copy_mc_fragile - copy memory with indication if an exception / fault happened + * + * The 'fragile' version is opted into by platform quirks and takes + * pains to avoid unrecoverable corner cases like 'fast-string' + * instruction sequences, and consuming poison across a cacheline + * boundary. The non-fragile version is equivalent to memcpy() + * regardless of CPU machine-check-recovery capability. + */ +SYM_FUNC_START(copy_mc_fragile) + cmpl $8, %edx + /* Less than 8 bytes? Go to byte copy loop */ + jb .L_no_whole_words + + /* Check for bad alignment of source */ + testl $7, %esi + /* Already aligned */ + jz .L_8byte_aligned + + /* Copy one byte at a time until source is 8-byte aligned */ + movl %esi, %ecx + andl $7, %ecx + subl $8, %ecx + negl %ecx + subl %ecx, %edx +.L_read_leading_bytes: + movb (%rsi), %al + COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes + COPY_MC_TEST_DST %rdi 1 .E_leading_bytes +.L_write_leading_bytes: + movb %al, (%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz .L_read_leading_bytes + +.L_8byte_aligned: + movl %edx, %ecx + andl $7, %edx + shrl $3, %ecx + jz .L_no_whole_words + +.L_read_words: + movq (%rsi), %r8 + COPY_MC_TEST_SRC %rsi 8 .E_read_words + COPY_MC_TEST_DST %rdi 8 .E_write_words +.L_write_words: + movq %r8, (%rdi) + addq $8, %rsi + addq $8, %rdi + decl %ecx + jnz .L_read_words + + /* Any trailing bytes? */ +.L_no_whole_words: + andl %edx, %edx + jz .L_done_memcpy_trap + + /* Copy trailing bytes */ + movl %edx, %ecx +.L_read_trailing_bytes: + movb (%rsi), %al + COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes + COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes +.L_write_trailing_bytes: + movb %al, (%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz .L_read_trailing_bytes + + /* Copy successful. Return zero */ +.L_done_memcpy_trap: + xorl %eax, %eax +.L_done: + ret +SYM_FUNC_END(copy_mc_fragile) +EXPORT_SYMBOL_GPL(copy_mc_fragile) + + .section .fixup, "ax" + /* + * Return number of bytes not copied for any failure. Note that + * there is no "tail" handling since the source buffer is 8-byte + * aligned and poison is cacheline aligned. + */ +.E_read_words: + shll $3, %ecx +.E_leading_bytes: + addl %edx, %ecx +.E_trailing_bytes: + mov %ecx, %eax + jmp .L_done + + /* + * For write fault handling, given the destination is unaligned, + * we handle faults on multi-byte writes with a byte-by-byte + * copy up to the write-protected page. + */ +.E_write_words: + shll $3, %ecx + addl %edx, %ecx + movl %ecx, %edx + jmp copy_mc_fragile_handle_tail + + .previous + + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) +#endif /* CONFIG_X86_MCE */ +#endif /* !CONFIG_UML */ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index bbcc05bcefad..037faac46b0c 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -4,7 +4,6 @@ #include #include #include -#include #include #include @@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig) SYM_FUNC_END(memcpy_orig) .popsection - -#ifndef CONFIG_UML - -MCSAFE_TEST_CTL - -/* - * __memcpy_mcsafe - memory copy with machine check exception handling - * Note that we only catch machine checks when reading the source addresses. - * Writes to target are posted and don't generate machine checks. - */ -SYM_FUNC_START(__memcpy_mcsafe) - cmpl $8, %edx - /* Less than 8 bytes? Go to byte copy loop */ - jb .L_no_whole_words - - /* Check for bad alignment of source */ - testl $7, %esi - /* Already aligned */ - jz .L_8byte_aligned - - /* Copy one byte at a time until source is 8-byte aligned */ - movl %esi, %ecx - andl $7, %ecx - subl $8, %ecx - negl %ecx - subl %ecx, %edx -.L_read_leading_bytes: - movb (%rsi), %al - MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes - MCSAFE_TEST_DST %rdi 1 .E_leading_bytes -.L_write_leading_bytes: - movb %al, (%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .L_read_leading_bytes - -.L_8byte_aligned: - movl %edx, %ecx - andl $7, %edx - shrl $3, %ecx - jz .L_no_whole_words - -.L_read_words: - movq (%rsi), %r8 - MCSAFE_TEST_SRC %rsi 8 .E_read_words - MCSAFE_TEST_DST %rdi 8 .E_write_words -.L_write_words: - movq %r8, (%rdi) - addq $8, %rsi - addq $8, %rdi - decl %ecx - jnz .L_read_words - - /* Any trailing bytes? */ -.L_no_whole_words: - andl %edx, %edx - jz .L_done_memcpy_trap - - /* Copy trailing bytes */ - movl %edx, %ecx -.L_read_trailing_bytes: - movb (%rsi), %al - MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes - MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes -.L_write_trailing_bytes: - movb %al, (%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .L_read_trailing_bytes - - /* Copy successful. Return zero */ -.L_done_memcpy_trap: - xorl %eax, %eax -.L_done: - ret -SYM_FUNC_END(__memcpy_mcsafe) -EXPORT_SYMBOL_GPL(__memcpy_mcsafe) - - .section .fixup, "ax" - /* - * Return number of bytes not copied for any failure. Note that - * there is no "tail" handling since the source buffer is 8-byte - * aligned and poison is cacheline aligned. - */ -.E_read_words: - shll $3, %ecx -.E_leading_bytes: - addl %edx, %ecx -.E_trailing_bytes: - mov %ecx, %eax - jmp .L_done - - /* - * For write fault handling, given the destination is unaligned, - * we handle faults on multi-byte writes with a byte-by-byte - * copy up to the write-protected page. - */ -.E_write_words: - shll $3, %ecx - addl %edx, %ecx - movl %ecx, %edx - jmp mcsafe_handle_tail - - .previous - - _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) - _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) - _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) - _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) - _ASM_EXTABLE(.L_write_words, .E_write_words) - _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) -#endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b0dfac3d3df7..5f1d4a9ebd5a 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n) } EXPORT_SYMBOL(clear_user); -/* - * Similar to copy_user_handle_tail, probe for the write fault point, - * but reuse __memcpy_mcsafe in case a new read error is encountered. - * clac() is handled in _copy_to_iter_mcsafe(). - */ -__visible notrace unsigned long -mcsafe_handle_tail(char *to, char *from, unsigned len) -{ - for (; len; --len, to++, from++) { - /* - * Call the assembly routine back directly since - * memcpy_mcsafe() may silently fallback to memcpy. - */ - unsigned long rem = __memcpy_mcsafe(to, from, 1); - - if (rem) - break; - } - return len; -} - #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 86dbe0c8b45c..9fc18fadacc6 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -49,7 +49,7 @@ do { \ #define pmem_assign(dest, src) ((dest) = (src)) #endif -#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM) +#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM) #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS #endif @@ -984,7 +984,8 @@ static void writecache_resume(struct dm_target *ti) } wc->freelist_size = 0; - r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t)); + r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count, + sizeof(uint64_t)); if (r) { writecache_error(wc, r, "hardware memory error when reading superblock: %d", r); sb_seq_count = cpu_to_le64(0); @@ -1000,7 +1001,8 @@ static void writecache_resume(struct dm_target *ti) e->seq_count = -1; continue; } - r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry)); + r = copy_mc_to_kernel(&wme, memory_entry(wc, e), + sizeof(struct wc_memory_entry)); if (r) { writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", (unsigned long)b, r); @@ -1198,7 +1200,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data if (rw == READ) { int r; - r = memcpy_mcsafe(buf, data, size); + r = copy_mc_to_kernel(buf, data, size); flush_dcache_page(bio_page(bio)); if (unlikely(r)) { writecache_error(wc, r, "hardware memory error when reading data: %d", r); @@ -2341,7 +2343,7 @@ invalid_optional: } } - r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); + r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock)); if (r) { ti->error = "Hardware memory error when reading superblock"; goto bad; @@ -2352,7 +2354,8 @@ invalid_optional: ti->error = "Unable to initialize device"; goto bad; } - r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); + r = copy_mc_to_kernel(&s, sb(wc), + sizeof(struct wc_memory_superblock)); if (r) { ti->error = "Hardware memory error when reading superblock"; goto bad; diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 45964acba944..22d865ba6353 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, if (rw == READ) { if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) return -EIO; - if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0) + if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0) return -EIO; return 0; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index fab29b514372..5c6939e004e2 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, while (len) { mem = kmap_atomic(page); chunk = min_t(unsigned int, len, PAGE_SIZE - off); - rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); + rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk); kunmap_atomic(mem); if (rem) return BLK_STS_IOERR; @@ -304,7 +304,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, /* * Use the 'no check' versions of copy_from_iter_flushcache() and - * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds + * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds * checking, both file offset and device offset, is handled by * dax_iomap_actor() */ @@ -317,7 +317,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - return _copy_to_iter_mcsafe(addr, bytes, i); + return _copy_mc_to_iter(addr, bytes, i); } static const struct dax_operations pmem_dax_ops = { diff --git a/include/linux/string.h b/include/linux/string.h index 9b7a0632e87a..b1f3894a0a3e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif -#ifndef __HAVE_ARCH_MEMCPY_MCSAFE -static inline __must_check unsigned long memcpy_mcsafe(void *dst, - const void *src, size_t cnt) -{ - memcpy(dst, src, cnt); - return 0; -} -#endif #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) { memcpy(dst, src, cnt); } #endif + void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *s, char old, char new); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 94b285411659..1ae36bc8db35 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -179,6 +179,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n) } #endif +#ifndef copy_mc_to_kernel +/* + * Without arch opt-in this generic copy_mc_to_kernel() will not handle + * #MC (or arch equivalent) during source read. + */ +static inline unsigned long __must_check +copy_mc_to_kernel(void *dst, const void *src, size_t cnt) +{ + memcpy(dst, src, cnt); + return 0; +} +#endif + static __always_inline void pagefault_disabled_inc(void) { current->pagefault_disabled++; diff --git a/include/linux/uio.h b/include/linux/uio.h index 3835a8a8e9ea..f14410c678bd 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -185,10 +185,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #define _copy_from_iter_flushcache _copy_from_iter_nocache #endif -#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE -size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i); +#ifdef CONFIG_ARCH_HAS_COPY_MC +size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #else -#define _copy_to_iter_mcsafe _copy_to_iter +#define _copy_mc_to_iter _copy_to_iter #endif static __always_inline __must_check @@ -201,12 +201,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) } static __always_inline __must_check -size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) +size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, true))) return 0; else - return _copy_to_iter_mcsafe(addr, bytes, i); + return _copy_mc_to_iter(addr, bytes, i); } size_t iov_iter_zero(size_t bytes, struct iov_iter *); diff --git a/lib/Kconfig b/lib/Kconfig index b4b98a03ff98..b46a9fd122c8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -635,7 +635,12 @@ config UACCESS_MEMCPY config ARCH_HAS_UACCESS_FLUSHCACHE bool -config ARCH_HAS_UACCESS_MCSAFE +# arch has a concept of a recoverable synchronous exception due to a +# memory-read error like x86 machine-check or ARM data-abort, and +# implements copy_mc_to_{user,kernel} to abort and report +# 'bytes-transferred' if that exception fires when accessing the source +# buffer. +config ARCH_HAS_COPY_MC bool # Temporary. Goes away when all archs are cleaned up diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 5e40786c8f12..d13304a034f5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -637,30 +637,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(_copy_to_iter); -#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE -static int copyout_mcsafe(void __user *to, const void *from, size_t n) +#ifdef CONFIG_ARCH_HAS_COPY_MC +static int copyout_mc(void __user *to, const void *from, size_t n) { if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); - n = copy_to_user_mcsafe((__force void *) to, from, n); + n = copy_mc_to_user((__force void *) to, from, n); } return n; } -static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, +static unsigned long copy_mc_to_page(struct page *page, size_t offset, const char *from, size_t len) { unsigned long ret; char *to; to = kmap_atomic(page); - ret = memcpy_mcsafe(to + offset, from, len); + ret = copy_mc_to_kernel(to + offset, from, len); kunmap_atomic(to); return ret; } -static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, +static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; @@ -678,7 +678,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, size_t chunk = min_t(size_t, n, PAGE_SIZE - off); unsigned long rem; - rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page, + rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); i->head = i_head; i->iov_offset = off + chunk - rem; @@ -695,18 +695,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, } /** - * _copy_to_iter_mcsafe - copy to user with source-read error exception handling + * _copy_mc_to_iter - copy to iter with source memory error exception handling * @addr: source kernel address * @bytes: total transfer length * @iter: destination iterator * - * The pmem driver arranges for filesystem-dax to use this facility via - * dax_copy_to_iter() for protecting read/write to persistent memory. - * Unless / until an architecture can guarantee identical performance - * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a - * performance regression to switch more users to the mcsafe version. + * The pmem driver deploys this for the dax operation + * (dax_copy_to_iter()) for dax reads (bypass page-cache and the + * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes + * successfully copied. * - * Otherwise, the main differences between this and typical _copy_to_iter(). + * The main differences between this and typical _copy_to_iter(). * * * Typical tail/residue handling after a fault retries the copy * byte-by-byte until the fault happens again. Re-triggering machine @@ -717,23 +716,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. * Compare to copy_to_iter() where only ITER_IOVEC attempts might return * a short copy. - * - * See MCSAFE_TEST for self-test. */ -size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) +size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; if (unlikely(iov_iter_is_pipe(i))) - return copy_pipe_to_iter_mcsafe(addr, bytes, i); + return copy_mc_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); iterate_and_advance(i, bytes, v, - copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), + copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, + v.iov_len), ({ - rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, - (from += v.bv_len) - v.bv_len, v.bv_len); + rem = copy_mc_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len); if (rem) { curr_addr = (unsigned long) from; bytes = curr_addr - s_addr - rem; @@ -741,8 +739,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) } }), ({ - rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, - v.iov_len); + rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) + - v.iov_len, v.iov_len); if (rem) { curr_addr = (unsigned long) from; bytes = curr_addr - s_addr - rem; @@ -753,8 +751,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) return bytes; } -EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); -#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ +EXPORT_SYMBOL_GPL(_copy_mc_to_iter); +#endif /* CONFIG_ARCH_HAS_COPY_MC */ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h deleted file mode 100644 index 2ccd588fbad4..000000000000 --- a/tools/arch/x86/include/asm/mcsafe_test.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MCSAFE_TEST_H_ -#define _MCSAFE_TEST_H_ - -.macro MCSAFE_TEST_CTL -.endm - -.macro MCSAFE_TEST_SRC reg count target -.endm - -.macro MCSAFE_TEST_DST reg count target -.endm -#endif /* _MCSAFE_TEST_H_ */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 45f8e1b02241..0b5b8ae56bd9 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -4,7 +4,6 @@ #include #include #include -#include #include #include @@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig) SYM_FUNC_END(memcpy_orig) .popsection - -#ifndef CONFIG_UML - -MCSAFE_TEST_CTL - -/* - * __memcpy_mcsafe - memory copy with machine check exception handling - * Note that we only catch machine checks when reading the source addresses. - * Writes to target are posted and don't generate machine checks. - */ -SYM_FUNC_START(__memcpy_mcsafe) - cmpl $8, %edx - /* Less than 8 bytes? Go to byte copy loop */ - jb .L_no_whole_words - - /* Check for bad alignment of source */ - testl $7, %esi - /* Already aligned */ - jz .L_8byte_aligned - - /* Copy one byte at a time until source is 8-byte aligned */ - movl %esi, %ecx - andl $7, %ecx - subl $8, %ecx - negl %ecx - subl %ecx, %edx -.L_read_leading_bytes: - movb (%rsi), %al - MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes - MCSAFE_TEST_DST %rdi 1 .E_leading_bytes -.L_write_leading_bytes: - movb %al, (%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .L_read_leading_bytes - -.L_8byte_aligned: - movl %edx, %ecx - andl $7, %edx - shrl $3, %ecx - jz .L_no_whole_words - -.L_read_words: - movq (%rsi), %r8 - MCSAFE_TEST_SRC %rsi 8 .E_read_words - MCSAFE_TEST_DST %rdi 8 .E_write_words -.L_write_words: - movq %r8, (%rdi) - addq $8, %rsi - addq $8, %rdi - decl %ecx - jnz .L_read_words - - /* Any trailing bytes? */ -.L_no_whole_words: - andl %edx, %edx - jz .L_done_memcpy_trap - - /* Copy trailing bytes */ - movl %edx, %ecx -.L_read_trailing_bytes: - movb (%rsi), %al - MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes - MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes -.L_write_trailing_bytes: - movb %al, (%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .L_read_trailing_bytes - - /* Copy successful. Return zero */ -.L_done_memcpy_trap: - xorl %eax, %eax -.L_done: - ret -SYM_FUNC_END(__memcpy_mcsafe) -EXPORT_SYMBOL_GPL(__memcpy_mcsafe) - - .section .fixup, "ax" - /* - * Return number of bytes not copied for any failure. Note that - * there is no "tail" handling since the source buffer is 8-byte - * aligned and poison is cacheline aligned. - */ -.E_read_words: - shll $3, %ecx -.E_leading_bytes: - addl %edx, %ecx -.E_trailing_bytes: - mov %ecx, %eax - jmp .L_done - - /* - * For write fault handling, given the destination is unaligned, - * we handle faults on multi-byte writes with a byte-by-byte - * copy up to the write-protected page. - */ -.E_write_words: - shll $3, %ecx - addl %edx, %ecx - movl %ecx, %edx - jmp mcsafe_handle_tail - - .previous - - _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) - _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) - _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) - _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) - _ASM_EXTABLE(.L_write_words, .E_write_words) - _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) -#endif diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e034a8f24f46..893f021fec63 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -548,8 +548,8 @@ static const char *uaccess_safe_builtin[] = { "__ubsan_handle_shift_out_of_bounds", /* misc */ "csum_partial_copy_generic", - "__memcpy_mcsafe", - "mcsafe_handle_tail", + "copy_mc_fragile", + "copy_mc_fragile_handle_tail", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ NULL }; diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index dd68a40a790c..878db6a59a41 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -13,7 +13,6 @@ perf-y += synthesize.o perf-y += kallsyms-parse.o perf-y += find-bit-bench.o -perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c deleted file mode 100644 index 4130734dde84..000000000000 --- a/tools/perf/bench/mem-memcpy-x86-64-lib.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy - * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy' - * happy. - */ -#include - -unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt); -unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); - -unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) -{ - for (; len; --len, to++, from++) { - /* - * Call the assembly routine back directly since - * memcpy_mcsafe() may silently fallback to memcpy. - */ - unsigned long rem = __memcpy_mcsafe(to, from, 1); - - if (rem) - break; - } - return len; -} diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a1a5dc645b40..2ac0fff6dad8 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -23,7 +23,8 @@ #include "nfit_test.h" #include "../watermark.h" -#include +#include +#include /* * Generate an NFIT table to describe the following topology: @@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; -static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); +static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); enum INJECT { INJECT_NONE, @@ -3291,7 +3292,7 @@ enum INJECT { INJECT_DST, }; -static void mcsafe_test_init(char *dst, char *src, size_t size) +static void copy_mc_test_init(char *dst, char *src, size_t size) { size_t i; @@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size) src[i] = (char) i; } -static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, +static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src, size_t size, unsigned long rem) { size_t i; @@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, return true; } -void mcsafe_test(void) +void copy_mc_test(void) { char *inject_desc[] = { "none", "source", "destination" }; enum INJECT inj; - if (IS_ENABLED(CONFIG_MCSAFE_TEST)) { + if (IS_ENABLED(CONFIG_COPY_MC_TEST)) { pr_info("%s: run...\n", __func__); } else { pr_info("%s: disabled, skip.\n", __func__); @@ -3344,31 +3345,31 @@ void mcsafe_test(void) switch (inj) { case INJECT_NONE: - mcsafe_inject_src(NULL); - mcsafe_inject_dst(NULL); - dst = &mcsafe_buf[2048]; - src = &mcsafe_buf[1024 - i]; + copy_mc_inject_src(NULL); + copy_mc_inject_dst(NULL); + dst = ©_mc_buf[2048]; + src = ©_mc_buf[1024 - i]; expect = 0; break; case INJECT_SRC: - mcsafe_inject_src(&mcsafe_buf[1024]); - mcsafe_inject_dst(NULL); - dst = &mcsafe_buf[2048]; - src = &mcsafe_buf[1024 - i]; + copy_mc_inject_src(©_mc_buf[1024]); + copy_mc_inject_dst(NULL); + dst = ©_mc_buf[2048]; + src = ©_mc_buf[1024 - i]; expect = 512 - i; break; case INJECT_DST: - mcsafe_inject_src(NULL); - mcsafe_inject_dst(&mcsafe_buf[2048]); - dst = &mcsafe_buf[2048 - i]; - src = &mcsafe_buf[1024]; + copy_mc_inject_src(NULL); + copy_mc_inject_dst(©_mc_buf[2048]); + dst = ©_mc_buf[2048 - i]; + src = ©_mc_buf[1024]; expect = 512 - i; break; } - mcsafe_test_init(dst, src, 512); - rem = __memcpy_mcsafe(dst, src, 512); - valid = mcsafe_test_validate(dst, src, 512, expect); + copy_mc_test_init(dst, src, 512); + rem = copy_mc_fragile(dst, src, 512); + valid = copy_mc_test_validate(dst, src, 512, expect); if (rem == expect && valid) continue; pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", @@ -3380,8 +3381,8 @@ void mcsafe_test(void) } } - mcsafe_inject_src(NULL); - mcsafe_inject_dst(NULL); + copy_mc_inject_src(NULL); + copy_mc_inject_dst(NULL); } static __init int nfit_test_init(void) @@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void) libnvdimm_test(); acpi_nfit_test(); device_dax_test(); - mcsafe_test(); + copy_mc_test(); dax_pmem_test(); dax_pmem_core_test(); #ifdef CONFIG_DEV_DAX_PMEM_COMPAT diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index ddaf140b8255..994b11af765c 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -12,4 +12,4 @@ memcpy_p7_t1 copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 -memcpy_mcsafe_64 +copy_mc_64 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 0917983a1c78..3095b1f1c02b 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ - memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \ + memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 EXTRA_SOURCES := validate.c ../harness.c stubs.S @@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES) -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \ -o $@ $^ -$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES) +$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ - -D COPY_LOOP=test_memcpy_mcsafe \ + -D COPY_LOOP=test_copy_mc_generic \ -o $@ $^ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S new file mode 120000 index 000000000000..dcbe06d500fb --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/copy_mc_64.S \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S deleted file mode 120000 index f0feef3062f6..000000000000 --- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S +++ /dev/null @@ -1 +0,0 @@ -../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S \ No newline at end of file -- cgit v1.3.1 From 5da8e4a658109e3b7e1f45ae672b7c06ac3e7158 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Oct 2020 20:40:25 -0700 Subject: x86/copy_mc: Introduce copy_mc_enhanced_fast_string() The motivations to go rework memcpy_mcsafe() are that the benefit of doing slow and careful copies is obviated on newer CPUs, and that the current opt-in list of CPUs to instrument recovery is broken relative to those CPUs. There is no need to keep an opt-in list up to date on an ongoing basis if pmem/dax operations are instrumented for recovery by default. With recovery enabled by default the old "mcsafe_key" opt-in to careful copying can be made a "fragile" opt-out. Where the "fragile" list takes steps to not consume poison across cachelines. The discussion with Linus made clear that the current "_mcsafe" suffix was imprecise to a fault. The operations that are needed by pmem/dax are to copy from a source address that might throw #MC to a destination that may write-fault, if it is a user page. So copy_to_user_mcsafe() becomes copy_mc_to_user() to indicate the separate precautions taken on source and destination. copy_mc_to_kernel() is introduced as a non-SMAP version that does not expect write-faults on the destination, but is still prepared to abort with an error code upon taking #MC. The original copy_mc_fragile() implementation had negative performance implications since it did not use the fast-string instruction sequence to perform copies. For this reason copy_mc_to_kernel() fell back to plain memcpy() to preserve performance on platforms that did not indicate the capability to recover from machine check exceptions. However, that capability detection was not architectural and now that some platforms can recover from fast-string consumption of memory errors the memcpy() fallback now causes these more capable platforms to fail. Introduce copy_mc_enhanced_fast_string() as the fast default implementation of copy_mc_to_kernel() and finalize the transition of copy_mc_fragile() to be a platform quirk to indicate 'copy-carefully'. With this in place, copy_mc_to_kernel() is fast and recovery-ready by default regardless of hardware capability. Thanks to Vivek for identifying that copy_user_generic() is not suitable as the copy_mc_to_user() backend since the #MC handler explicitly checks ex_has_fault_handler(). Thanks to the 0day robot for catching a performance bug in the x86/copy_mc_to_user implementation. [ bp: Add the "why" for this change from the 0/2th message, massage. ] Fixes: 92b0729c34ca ("x86/mm, x86/mce: Add memcpy_mcsafe()") Reported-by: Erwin Tsaur Reported-by: 0day robot Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Tested-by: Erwin Tsaur Cc: Link: https://lkml.kernel.org/r/160195562556.2163339.18063423034951948973.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/lib/copy_mc.c | 32 +++++++++++++++++++++++--------- arch/x86/lib/copy_mc_64.S | 36 ++++++++++++++++++++++++++++++++++++ tools/objtool/check.c | 1 + 3 files changed, 60 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c index 2633635530b7..c13e8c9ee926 100644 --- a/arch/x86/lib/copy_mc.c +++ b/arch/x86/lib/copy_mc.c @@ -45,6 +45,8 @@ void enable_copy_mc_fragile(void) #define copy_mc_fragile_enabled (0) #endif +unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len); + /** * copy_mc_to_kernel - memory copy that handles source exceptions * @@ -52,9 +54,11 @@ void enable_copy_mc_fragile(void) * @src: source address * @len: number of bytes to copy * - * Call into the 'fragile' version on systems that have trouble - * actually do machine check recovery. Everyone else can just - * use memcpy(). + * Call into the 'fragile' version on systems that benefit from avoiding + * corner case poison consumption scenarios, For example, accessing + * poison across 2 cachelines with a single instruction. Almost all + * other uses case can use copy_mc_enhanced_fast_string() for a fast + * recoverable copy, or fallback to plain memcpy. * * Return 0 for success, or number of bytes not copied if there was an * exception. @@ -63,6 +67,8 @@ unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigne { if (copy_mc_fragile_enabled) return copy_mc_fragile(dst, src, len); + if (static_cpu_has(X86_FEATURE_ERMS)) + return copy_mc_enhanced_fast_string(dst, src, len); memcpy(dst, src, len); return 0; } @@ -72,11 +78,19 @@ unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned { unsigned long ret; - if (!copy_mc_fragile_enabled) - return copy_user_generic(dst, src, len); + if (copy_mc_fragile_enabled) { + __uaccess_begin(); + ret = copy_mc_fragile(dst, src, len); + __uaccess_end(); + return ret; + } + + if (static_cpu_has(X86_FEATURE_ERMS)) { + __uaccess_begin(); + ret = copy_mc_enhanced_fast_string(dst, src, len); + __uaccess_end(); + return ret; + } - __uaccess_begin(); - ret = copy_mc_fragile(dst, src, len); - __uaccess_end(); - return ret; + return copy_user_generic(dst, src, len); } diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S index c3b613c4544a..892d8915f609 100644 --- a/arch/x86/lib/copy_mc_64.S +++ b/arch/x86/lib/copy_mc_64.S @@ -124,4 +124,40 @@ EXPORT_SYMBOL_GPL(copy_mc_fragile) _ASM_EXTABLE(.L_write_words, .E_write_words) _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) #endif /* CONFIG_X86_MCE */ + +/* + * copy_mc_enhanced_fast_string - memory copy with exception handling + * + * Fast string copy + fault / exception handling. If the CPU does + * support machine check exception recovery, but does not support + * recovering from fast-string exceptions then this CPU needs to be + * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any + * machine check recovery support this version should be no slower than + * standard memcpy. + */ +SYM_FUNC_START(copy_mc_enhanced_fast_string) + movq %rdi, %rax + movq %rdx, %rcx +.L_copy: + rep movsb + /* Copy successful. Return zero */ + xorl %eax, %eax + ret +SYM_FUNC_END(copy_mc_enhanced_fast_string) + + .section .fixup, "ax" +.E_copy: + /* + * On fault %rcx is updated such that the copy instruction could + * optionally be restarted at the fault position, i.e. it + * contains 'bytes remaining'. A non-zero return indicates error + * to copy_mc_generic() users, or indicate short transfers to + * user-copy routines. + */ + movq %rcx, %rax + ret + + .previous + + _ASM_EXTABLE_FAULT(.L_copy, .E_copy) #endif /* !CONFIG_UML */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 893f021fec63..b3e4efcf7ca6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -550,6 +550,7 @@ static const char *uaccess_safe_builtin[] = { "csum_partial_copy_generic", "copy_mc_fragile", "copy_mc_fragile_handle_tail", + "copy_mc_enhanced_fast_string", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ NULL }; -- cgit v1.3.1 From dc9af82ea0614bb138705d1f5230d53b3b1dfb83 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 20 Aug 2020 14:45:14 +1000 Subject: selftests/powerpc: Add a rtas_filter selftest Add a selftest to test the basic functionality of CONFIG_RTAS_FILTER. Signed-off-by: Andrew Donnellan [mpe: Change rmo_start/end to 32-bit to avoid build errors on ppc64] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200820044512.7543-2-ajd@linux.ibm.com --- tools/testing/selftests/powerpc/syscalls/Makefile | 2 +- .../selftests/powerpc/syscalls/rtas_filter.c | 285 +++++++++++++++++++++ 2 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/syscalls/rtas_filter.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index 01b22775ca87..b63f8459c704 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -TEST_GEN_PROGS := ipc_unmuxed +TEST_GEN_PROGS := ipc_unmuxed rtas_filter CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c new file mode 100644 index 000000000000..03b487f18d00 --- /dev/null +++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2005-2020 IBM Corporation. + * + * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define cpu_to_be32(x) bswap_32(x) +#define be32_to_cpu(x) bswap_32(x) +#else +#define cpu_to_be32(x) (x) +#define be32_to_cpu(x) (x) +#endif + +#define RTAS_IO_ASSERT -1098 /* Unexpected I/O Error */ +#define RTAS_UNKNOWN_OP -1099 /* No Firmware Implementation of Function */ +#define BLOCK_SIZE 4096 +#define PAGE_SIZE 4096 +#define MAX_PAGES 64 + +static const char *ofdt_rtas_path = "/proc/device-tree/rtas"; + +typedef __be32 uint32_t; +struct rtas_args { + __be32 token; + __be32 nargs; + __be32 nret; + __be32 args[16]; + __be32 *rets; /* Pointer to return values in args[]. */ +}; + +struct region { + uint64_t addr; + uint32_t size; + struct region *next; +}; + +int read_entire_file(int fd, char **buf, size_t *len) +{ + size_t buf_size = 0; + size_t off = 0; + int rc; + + *buf = NULL; + do { + buf_size += BLOCK_SIZE; + if (*buf == NULL) + *buf = malloc(buf_size); + else + *buf = realloc(*buf, buf_size); + + if (*buf == NULL) + return -ENOMEM; + + rc = read(fd, *buf + off, BLOCK_SIZE); + if (rc < 0) + return -EIO; + + off += rc; + } while (rc == BLOCK_SIZE); + + if (len) + *len = off; + + return 0; +} + +static int open_prop_file(const char *prop_path, const char *prop_name, int *fd) +{ + char *path; + int len; + + /* allocate enough for two string, a slash and trailing NULL */ + len = strlen(prop_path) + strlen(prop_name) + 1 + 1; + path = malloc(len); + if (path == NULL) + return -ENOMEM; + + snprintf(path, len, "%s/%s", prop_path, prop_name); + + *fd = open(path, O_RDONLY); + free(path); + if (*fd < 0) + return -errno; + + return 0; +} + +static int get_property(const char *prop_path, const char *prop_name, + char **prop_val, size_t *prop_len) +{ + int rc, fd; + + rc = open_prop_file(prop_path, prop_name, &fd); + if (rc) + return rc; + + rc = read_entire_file(fd, prop_val, prop_len); + close(fd); + + return rc; +} + +int rtas_token(const char *call_name) +{ + char *prop_buf = NULL; + size_t len; + int rc; + + rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len); + if (rc < 0) { + rc = RTAS_UNKNOWN_OP; + goto err; + } + + rc = be32_to_cpu(*(int *)prop_buf); + +err: + free(prop_buf); + return rc; +} + +static int read_kregion_bounds(struct region *kregion) +{ + char *buf; + int fd; + int rc; + + fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY); + if (fd < 0) { + printf("Could not open rmo_buffer file\n"); + return RTAS_IO_ASSERT; + } + + rc = read_entire_file(fd, &buf, NULL); + close(fd); + if (rc) { + free(buf); + return rc; + } + + sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size); + free(buf); + + if (!(kregion->size && kregion->addr) || + (kregion->size > (PAGE_SIZE * MAX_PAGES))) { + printf("Unexpected kregion bounds\n"); + return RTAS_IO_ASSERT; + } + + return 0; +} + +static int rtas_call(const char *name, int nargs, + int nrets, ...) +{ + struct rtas_args args; + __be32 *rets[16]; + int i, rc, token; + va_list ap; + + va_start(ap, nrets); + + token = rtas_token(name); + if (token == RTAS_UNKNOWN_OP) { + // We don't care if the call doesn't exist + printf("call '%s' not available, skipping...", name); + rc = RTAS_UNKNOWN_OP; + goto err; + } + + args.token = cpu_to_be32(token); + args.nargs = cpu_to_be32(nargs); + args.nret = cpu_to_be32(nrets); + + for (i = 0; i < nargs; i++) + args.args[i] = (__be32) va_arg(ap, unsigned long); + + for (i = 0; i < nrets; i++) + rets[i] = (__be32 *) va_arg(ap, unsigned long); + + rc = syscall(__NR_rtas, &args); + if (rc) { + rc = -errno; + goto err; + } + + if (nrets) { + *(rets[0]) = be32_to_cpu(args.args[nargs]); + + for (i = 1; i < nrets; i++) { + *(rets[i]) = args.args[nargs + i]; + } + } + +err: + va_end(ap); + return rc; +} + +static int test(void) +{ + struct region rmo_region; + uint32_t rmo_start; + uint32_t rmo_end; + __be32 rets[1]; + int rc; + + // Test a legitimate harmless call + // Expected: call succeeds + printf("Test a permitted call, no parameters... "); + rc = rtas_call("get-time-of-day", 0, 1, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP); + + // Test a prohibited call + // Expected: call returns -EINVAL + printf("Test a prohibited call... "); + rc = rtas_call("nvram-fetch", 0, 1, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Get RMO + rc = read_kregion_bounds(&rmo_region); + if (rc) { + printf("Couldn't read RMO region bounds, skipping remaining cases\n"); + return 0; + } + rmo_start = rmo_region.addr; + rmo_end = rmo_start + rmo_region.size - 1; + printf("RMO range: %08x - %08x\n", rmo_start, rmo_end); + + // Test a permitted call, user-supplied size, buffer inside RMO + // Expected: call succeeds + printf("Test a permitted call, user-supplied size, buffer inside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start), + cpu_to_be32(rmo_end - rmo_start + 1), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, user-supplied size, buffer start outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, user-supplied size, buffer start outside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1), + cpu_to_be32(4000), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, user-supplied size, buffer end outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, user-supplied size, buffer end outside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start), + cpu_to_be32(rmo_end - rmo_start + 2), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, fixed size, buffer end outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, fixed size, buffer end outside RMO... "); + rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test, "rtas_filter"); +} -- cgit v1.3.1 From 2486baae2cf6df73554144d0a4e40ae8809b54d4 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Oct 2020 17:50:28 +0200 Subject: objtool: Allow nested externs to enable BUILD_BUG() Currently BUILD_BUG() macro is expanded to smth like the following: do { extern void __compiletime_assert_0(void) __attribute__((error("BUILD_BUG failed"))); if (!(!(1))) __compiletime_assert_0(); } while (0); If used in a function body this obviously would produce build errors with -Wnested-externs and -Werror. Build objtool with -Wno-nested-externs to enable BUILD_BUG() usage. Signed-off-by: Vasily Gorbik Signed-off-by: Josh Poimboeuf --- tools/objtool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 33d1e3ca8efd..4ea9a833dde7 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -37,7 +37,7 @@ INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include \ -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include -WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed +WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) -- cgit v1.3.1 From a0f2b7acb4b1d29127ff99c714233b973afd1411 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 6 Oct 2020 10:13:43 +0800 Subject: libbpf: Close map fd if init map slots failed Previously we forgot to close the map fd if bpf_map_update_elem() failed during map slot init, which will leak map fd. Let's move map slot initialization to new function init_map_slots() to simplify the code. And close the map fd if init slot failed. Reported-by: Andrii Nakryiko Signed-off-by: Hangbin Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201006021345.3817033-2-liuhangbin@gmail.com --- tools/lib/bpf/libbpf.c | 55 +++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9b36c52b8511..7cec237d9d1f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4203,6 +4203,36 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) return 0; } +static int init_map_slots(struct bpf_map *map) +{ + const struct bpf_map *targ_map; + unsigned int i; + int fd, err; + + for (i = 0; i < map->init_slots_sz; i++) { + if (!map->init_slots[i]) + continue; + + targ_map = map->init_slots[i]; + fd = bpf_map__fd(targ_map); + err = bpf_map_update_elem(map->fd, &i, &fd, 0); + if (err) { + err = -errno; + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", + map->name, i, targ_map->name, + fd, err); + return err; + } + pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", + map->name, i, targ_map->name, fd); + } + + zfree(&map->init_slots); + map->init_slots_sz = 0; + + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -4245,28 +4275,11 @@ bpf_object__create_maps(struct bpf_object *obj) } if (map->init_slots_sz) { - for (j = 0; j < map->init_slots_sz; j++) { - const struct bpf_map *targ_map; - int fd; - - if (!map->init_slots[j]) - continue; - - targ_map = map->init_slots[j]; - fd = bpf_map__fd(targ_map); - err = bpf_map_update_elem(map->fd, &j, &fd, 0); - if (err) { - err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, j, targ_map->name, - fd, err); - goto err_out; - } - pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", - map->name, j, targ_map->name, fd); + err = init_map_slots(map); + if (err < 0) { + zclose(map->fd); + goto err_out; } - zfree(&map->init_slots); - map->init_slots_sz = 0; } if (map->pin_path && !map->pinned) { -- cgit v1.3.1 From 2c193d32caee83322a8511a88ba0cd097bba73f1 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 6 Oct 2020 10:13:44 +0800 Subject: libbpf: Check if pin_path was set even map fd exist Say a user reuse map fd after creating a map manually and set the pin_path, then load the object via libbpf. In libbpf bpf_object__create_maps(), bpf_object__reuse_map() will return 0 if there is no pinned map in map->pin_path. Then after checking if map fd exist, we should also check if pin_path was set and do bpf_map__pin() instead of continue the loop. Fix it by creating map if fd not exist and continue checking pin_path after that. Suggested-by: Andrii Nakryiko Signed-off-by: Hangbin Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201006021345.3817033-3-liuhangbin@gmail.com --- tools/lib/bpf/libbpf.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7cec237d9d1f..9f90d1a686fd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4256,29 +4256,28 @@ bpf_object__create_maps(struct bpf_object *obj) if (map->fd >= 0) { pr_debug("map '%s': skipping creation (preset fd=%d)\n", map->name, map->fd); - continue; - } - - err = bpf_object__create_map(obj, map); - if (err) - goto err_out; + } else { + err = bpf_object__create_map(obj, map); + if (err) + goto err_out; - pr_debug("map '%s': created successfully, fd=%d\n", map->name, - map->fd); + pr_debug("map '%s': created successfully, fd=%d\n", + map->name, map->fd); - if (bpf_map__is_internal(map)) { - err = bpf_object__populate_internal_map(obj, map); - if (err < 0) { - zclose(map->fd); - goto err_out; + if (bpf_map__is_internal(map)) { + err = bpf_object__populate_internal_map(obj, map); + if (err < 0) { + zclose(map->fd); + goto err_out; + } } - } - if (map->init_slots_sz) { - err = init_map_slots(map); - if (err < 0) { - zclose(map->fd); - goto err_out; + if (map->init_slots_sz) { + err = init_map_slots(map); + if (err < 0) { + zclose(map->fd); + goto err_out; + } } } -- cgit v1.3.1 From 44c4aa2bd1511ebd7f24d4a2eb7a26053d51d6ae Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 6 Oct 2020 10:13:45 +0800 Subject: selftest/bpf: Test pinning map with reused map fd This add a test to make sure that we can still pin maps with reused map fd. Signed-off-by: Hangbin Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201006021345.3817033-4-liuhangbin@gmail.com --- tools/testing/selftests/bpf/prog_tests/pinning.c | 49 +++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index 041952524c55..fcf54b3a1dd0 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -37,7 +37,7 @@ void test_pinning(void) struct stat statbuf = {}; struct bpf_object *obj; struct bpf_map *map; - int err; + int err, map_fd; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .pin_root_path = custpath, ); @@ -213,6 +213,53 @@ void test_pinning(void) if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) goto out; + /* remove the custom pin path to re-test it with reuse fd below */ + err = unlink(custpinpath); + if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno)) + goto out; + + err = rmdir(custpath); + if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno)) + goto out; + + bpf_object__close(obj); + + /* test pinning at custom path with reuse fd */ + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32), + sizeof(__u64), 1, 0); + if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd)) + goto out; + + map = bpf_object__find_map_by_name(obj, "pinmap"); + if (CHECK(!map, "find map", "NULL map")) + goto close_map_fd; + + err = bpf_map__reuse_fd(map, map_fd); + if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + err = bpf_map__set_pin_path(map, custpinpath); + if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + err = bpf_object__load(obj); + if (CHECK(err, "custom load", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + /* check that pinmap was pinned at the custom path */ + err = stat(custpinpath, &statbuf); + if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) + goto close_map_fd; + +close_map_fd: + close(map_fd); out: unlink(pinpath); unlink(nopinpath); -- cgit v1.3.1 From 8cee9107e72c93a52a7f2c58658c99887bdb05e0 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Mon, 5 Oct 2020 15:45:28 -0700 Subject: bpf, libbpf: Use valid btf in bpf_program__set_attach_target bpf_program__set_attach_target(prog, fd, ...) will always fail when fd = 0 (attach to a kernel symbol) because obj->btf_vmlinux is NULL and there is no way to set it (at the moment btf_vmlinux is meant to be temporary storage for use in bpf_object__load_xattr()). Fix this by using libbpf_find_vmlinux_btf_id(). At some point we may want to opportunistically cache btf_vmlinux so it can be reused with multiple programs. Signed-off-by: Luigi Rizzo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Petar Penkov Link: https://lore.kernel.org/bpf/20201005224528.389097-1-lrizzo@google.com --- tools/lib/bpf/libbpf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9f90d1a686fd..faec389c4849 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10451,9 +10451,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog, btf_id = libbpf_find_prog_btf_id(attach_func_name, attach_prog_fd); else - btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, - attach_func_name, - prog->expected_attach_type); + btf_id = libbpf_find_vmlinux_btf_id(attach_func_name, + prog->expected_attach_type); if (btf_id < 0) return btf_id; -- cgit v1.3.1 From bf88a80a0407e67f21ceafbf03828086f149dfad Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 6 Oct 2020 19:28:57 -0700 Subject: selftests/bpf: Fix test_verifier after introducing resolve_pseudo_ldimm64 Commit 4976b718c355 ("bpf: Introduce pseudo_btf_id") switched the order of check_subprogs() and resolve_pseudo_ldimm() in the verifier. Now an empty prog expects to see the error "last insn is not an the prog of a single invalid ldimm exit or jmp" instead, because the check for subprogs comes first. It's now pointless to validate that half of ldimm64 won't be the last instruction. Tested: # ./test_verifier Summary: 1129 PASSED, 537 SKIPPED, 0 FAILED and the full set of bpf selftests. Fixes: 4976b718c355 ("bpf: Introduce pseudo_btf_id") Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201007022857.2791884-1-haoluo@google.com --- tools/testing/selftests/bpf/verifier/basic.c | 2 +- tools/testing/selftests/bpf/verifier/ld_imm64.c | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c index b8d18642653a..de84f0d57082 100644 --- a/tools/testing/selftests/bpf/verifier/basic.c +++ b/tools/testing/selftests/bpf/verifier/basic.c @@ -2,7 +2,7 @@ "empty prog", .insns = { }, - .errstr = "unknown opcode 00", + .errstr = "last insn is not an exit or jmp", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index 3856dba733e9..f9297900cea6 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -50,14 +50,6 @@ .errstr = "invalid bpf_ld_imm64 insn", .result = REJECT, }, -{ - "test5 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, -}, { "test6 ld_imm64", .insns = { -- cgit v1.3.1 From bef69bd7cfc363ab94b84ea29102f3e913ed3c6c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 7 Oct 2020 17:13:11 +0900 Subject: perf stat: Fix out of bounds CPU map access when handling armv8_pmu events It was reported that 'perf stat' crashed when using with armv8_pmu (CPU) events with the task mode. As 'perf stat' uses an empty cpu map for task mode but armv8_pmu has its own cpu mask, it has confused which map it should use when accessing file descriptors and this causes segfaults: (gdb) bt #0 0x0000000000603fc8 in perf_evsel__close_fd_cpu (evsel=, cpu=) at evsel.c:122 #1 perf_evsel__close_cpu (evsel=evsel@entry=0x716e950, cpu=7) at evsel.c:156 #2 0x00000000004d4718 in evlist__close (evlist=0x70a7cb0) at util/evlist.c:1242 #3 0x0000000000453404 in __run_perf_stat (argc=3, argc@entry=1, argv=0x30, argv@entry=0xfffffaea2f90, run_idx=119, run_idx@entry=1701998435) at builtin-stat.c:929 #4 0x0000000000455058 in run_perf_stat (run_idx=1701998435, argv=0xfffffaea2f90, argc=1) at builtin-stat.c:947 #5 cmd_stat (argc=1, argv=0xfffffaea2f90) at builtin-stat.c:2357 #6 0x00000000004bb888 in run_builtin (p=p@entry=0x9764b8 , argc=argc@entry=4, argv=argv@entry=0xfffffaea2f90) at perf.c:312 #7 0x00000000004bbb54 in handle_internal_command (argc=argc@entry=4, argv=argv@entry=0xfffffaea2f90) at perf.c:364 #8 0x0000000000435378 in run_argv (argcp=, argv=) at perf.c:408 #9 main (argc=4, argv=0xfffffaea2f90) at perf.c:538 To fix this, I simply used the given cpu map unless the evsel actually is not a system-wide event (like uncore events). Fixes: 7736627b865d ("perf stat: Use affinity for closing file descriptors") Reported-by: Wei Li Signed-off-by: Namhyung Kim Tested-by: Barry Song Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201007081311.1831003-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 2208444ecb44..cfcdbd7be066 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -45,6 +45,9 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, if (!evsel->own_cpus || evlist->has_user_cpus) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->cpus); + } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) { + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__get(evlist->cpus); } else if (evsel->cpus != evsel->own_cpus) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evsel->own_cpus); -- cgit v1.3.1 From dd841a749d1ded8e2e5facc4242ee0b6779fc0cb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 14 Jun 2020 06:07:10 -0400 Subject: radix tree test suite: Fix compilation Introducing local_lock broke compilation; fix it all up. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/radix-tree.h | 1 + lib/radix-tree.c | 1 - tools/testing/radix-tree/linux/kernel.h | 1 + tools/testing/radix-tree/linux/local_lock.h | 8 ++++++++ tools/testing/radix-tree/test.h | 4 ---- 5 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 tools/testing/radix-tree/linux/local_lock.h (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c2a9f7c90727..5c85059a92ba 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8e4a3a4397f2..0f10485d46b6 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -20,7 +20,6 @@ #include #include #include -#include #include /* in_interrupt() */ #include #include diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 4568248222ae..39867fd80c8f 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -22,4 +22,5 @@ #define __releases(x) #define __must_hold(x) +#define EXPORT_PER_CPU_SYMBOL_GPL(x) #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h new file mode 100644 index 000000000000..b3cf8b233ca4 --- /dev/null +++ b/tools/testing/radix-tree/linux/local_lock.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_LOCAL_LOCK +#define _LINUX_LOCAL_LOCK +typedef struct { } local_lock_t; + +static inline void local_lock(local_lock_t *lock) { } +static inline void local_unlock(local_lock_t *lock) { } +#define INIT_LOCAL_LOCK(x) { } +#endif diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 34dab4d18744..7ef7067e942c 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); unsigned long shift_maxindex(unsigned int shift); int radix_tree_cpu_dead(unsigned int cpu); -struct radix_tree_preload { - unsigned nr; - struct radix_tree_node *nodes; -}; extern struct radix_tree_preload radix_tree_preloads; -- cgit v1.3.1 From a219b856a2b993da234108307be772448f22b0ce Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 2 Apr 2020 14:26:13 -0400 Subject: ida: Free allocated bitmap in error path If a bitmap needs to be allocated, and then by the time the thread is scheduled to be run again all the indices which would satisfy the allocation have been allocated then we would leak the allocation. Almost impossible to hit in practice, but a trivial fix. Found by Coverity. Fixes: f32f004cddf8 ("ida: Convert to XArray") Reported-by: coverity-bot Reviewed-by: Kees Cook Signed-off-by: Matthew Wilcox (Oracle) --- lib/idr.c | 1 + tools/testing/radix-tree/idr-test.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'tools') diff --git a/lib/idr.c b/lib/idr.c index c2cf2c52bbde..4d2eef0259d2 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -470,6 +470,7 @@ alloc: goto retry; nospc: xas_unlock_irqrestore(&xas, flags); + kfree(alloc); return -ENOSPC; } EXPORT_SYMBOL(ida_alloc_range); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 8995092d541e..3b796dd5e577 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg) return NULL; } +static void *ida_leak_fn(void *arg) +{ + struct ida *ida = arg; + time_t s = time(NULL); + int i, ret; + + rcu_register_thread(); + + do for (i = 0; i < 1000; i++) { + ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL); + if (ret >= 0) + ida_free(ida, 128); + } while (time(NULL) < s + 2); + + rcu_unregister_thread(); + return NULL; +} + void ida_thread_tests(void) { + DEFINE_IDA(ida); pthread_t threads[20]; int i; @@ -536,6 +555,16 @@ void ida_thread_tests(void) while (i--) pthread_join(threads[i], NULL); + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); + assert(ida_is_empty(&ida)); } void ida_tests(void) -- cgit v1.3.1 From f0f0a5df4e081e7a659929303fe83450edce9a3e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 28 Sep 2020 13:26:48 -0700 Subject: selftests: Extract run_kselftest.sh and generate stand-alone test list Instead of building a script on the fly (which just repeats the same thing for each test collection), move the script out of the Makefile and into run_kselftest.sh, which reads kselftest-list.txt. Adjust the emit_tests target to report each test on a separate line so that test running tools (e.g. LAVA) can easily remove individual tests (for example, as seen in [1]). [1] https://github.com/Linaro/test-definitions/pull/208/commits/2e7b62155e4998e54ac0587704932484d4ff84c8 Signed-off-by: Kees Cook Tested-by: Naresh Kamboju Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 26 +++++++------------------- tools/testing/selftests/lib.mk | 5 ++--- tools/testing/selftests/run_kselftest.sh | 28 ++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 22 deletions(-) create mode 100755 tools/testing/selftests/run_kselftest.sh (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 15c1c1359c50..d9c283503159 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -206,6 +206,7 @@ KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH)) # Avoid changing the rest of the logic here and lib.mk. INSTALL_PATH := $(KSFT_INSTALL_PATH) ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh +TEST_LIST := $(INSTALL_PATH)/kselftest-list.txt install: all ifdef INSTALL_PATH @@ -214,6 +215,8 @@ ifdef INSTALL_PATH install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/ + install -m 744 run_kselftest.sh $(INSTALL_PATH)/ + rm -f $(TEST_LIST) @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -222,33 +225,18 @@ ifdef INSTALL_PATH ret=$$((ret * $$?)); \ done; exit $$ret; - @# Ask all targets to emit their test scripts - echo "#!/bin/sh" > $(ALL_SCRIPT) - echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT) - echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT) - echo ". ./kselftest/runner.sh" >> $(ALL_SCRIPT) - echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) - echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT) - echo " logfile=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT) - echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT) - echo "fi" >> $(ALL_SCRIPT) - @# While building run_kselftest.sh skip also non-existent TARGET dirs: + @# Ask all targets to emit their test scripts + @# While building kselftest-list.text skip also non-existent TARGET dirs: @# they could be the result of a build failure and should NOT be @# included in the generated runlist. for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \ - echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ - echo "cd $$TARGET" >> $(ALL_SCRIPT); \ - echo -n "run_many" >> $(ALL_SCRIPT); \ echo -n "Emit Tests for $$TARGET\n"; \ - $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ - echo "" >> $(ALL_SCRIPT); \ - echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ + $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \ + -C $$TARGET emit_tests >> $(TEST_LIST); \ done; - - chmod u+x $(ALL_SCRIPT) else $(error Error: set INSTALL_PATH to use install) endif diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 51124b962d56..30848ca36555 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -107,9 +107,8 @@ endif emit_tests: for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo " \\"; \ - echo -n " \"$$BASENAME_TEST\""; \ - done; \ + echo "$(COLLECTION):$$BASENAME_TEST"; \ + done # define if isn't already. It is undefined in make O= case. ifeq ($(RM),) diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh new file mode 100755 index 000000000000..8b0ad4766d78 --- /dev/null +++ b/tools/testing/selftests/run_kselftest.sh @@ -0,0 +1,28 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run installed kselftest tests. +# +BASE_DIR=$(realpath $(dirname $0)) +cd $BASE_DIR +TESTS="$BASE_DIR"/kselftest-list.txt +if [ ! -r "$TESTS" ] ; then + echo "$0: Could not find list of tests to run ($TESTS)" >&2 + exit 1 +fi +available="$(cat "$TESTS")" + +. ./kselftest/runner.sh +ROOT=$PWD + +if [ "$1" = "--summary" ] ; then + logfile="$BASE_DIR"/output.log + cat /dev/null > $logfile +fi + +collections=$(echo "$available" | cut -d: -f1 | uniq) +for collection in $collections ; do + [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg + tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2) + (cd "$collection" && run_many $tests) +done -- cgit v1.3.1 From 5da1918446a1d50d57f2f6062f7fdede0b052473 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 28 Sep 2020 13:26:49 -0700 Subject: selftests/run_kselftest.sh: Make each test individually selectable Currently with run_kselftest.sh there is no way to choose which test we could run. All the tests listed in kselftest-list.txt are all run every time. This patch enhanced the run_kselftest.sh to make the test collections (or tests) individually selectable. e.g.: $ ./run_kselftest.sh -c seccomp -t timers:posix_timers -t timers:nanosleep Additionally adds a way to list all known tests with "-l", usage with "-h", and perform a dry run without running tests with "-n". Co-developed-by: Hangbin Liu Signed-off-by: Hangbin Liu Signed-off-by: Kees Cook Tested-by: Naresh Kamboju Signed-off-by: Shuah Khan --- tools/testing/selftests/run_kselftest.sh | 77 +++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 8b0ad4766d78..609a4ef9300e 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -8,21 +8,86 @@ cd $BASE_DIR TESTS="$BASE_DIR"/kselftest-list.txt if [ ! -r "$TESTS" ] ; then echo "$0: Could not find list of tests to run ($TESTS)" >&2 - exit 1 + available="" +else + available="$(cat "$TESTS")" fi -available="$(cat "$TESTS")" . ./kselftest/runner.sh ROOT=$PWD -if [ "$1" = "--summary" ] ; then - logfile="$BASE_DIR"/output.log - cat /dev/null > $logfile +usage() +{ + cat < $logfile + shift ;; + -t | --test) + TESTS="$TESTS $2" + shift 2 ;; + -c | --collection) + COLLECTIONS="$COLLECTIONS $2" + shift 2 ;; + -l | --list) + echo "$available" + exit 0 ;; + -n | --dry-run) + dryrun="echo" + shift ;; + -h | --help) + usage 0 ;; + "") + break ;; + *) + usage 1 ;; + esac +done + +# Add all selected collections to the explicit test list. +if [ -n "$COLLECTIONS" ]; then + for collection in $COLLECTIONS ; do + found="$(echo "$available" | grep "^$collection:")" + if [ -z "$found" ] ; then + echo "No such collection '$collection'" >&2 + exit 1 + fi + TESTS="$TESTS $found" + done +fi +# Replace available test list with explicitly selected tests. +if [ -n "$TESTS" ]; then + valid="" + for test in $TESTS ; do + found="$(echo "$available" | grep "^${test}$")" + if [ -z "$found" ] ; then + echo "No such test '$test'" >&2 + exit 1 + fi + valid="$valid $found" + done + available="$(echo "$valid" | sed -e 's/ /\n/g')" fi collections=$(echo "$available" | cut -d: -f1 | uniq) for collection in $collections ; do [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2) - (cd "$collection" && run_many $tests) + ($dryrun cd "$collection" && $dryrun run_many $tests) done -- cgit v1.3.1 From 49f3d12b0f70ea867b891ad2a97f6e51bb564e18 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Wed, 7 Oct 2020 07:57:17 +0200 Subject: bpf: Fix typo in uapi/linux/bpf.h Reported-by: Samanta Navarro Signed-off-by: Jakub Wilk Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201007055717.7319-1-jwilk@jwilk.net --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c446394135be..d83561e8cd2c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2253,7 +2253,7 @@ union bpf_attr { * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it + * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c446394135be..d83561e8cd2c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2253,7 +2253,7 @@ union bpf_attr { * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it + * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The -- cgit v1.3.1 From 80348d8867c65e2c64ca78d82326e889550a24b6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 7 Oct 2020 13:42:26 +0200 Subject: libbpf: Fix compatibility problem in xsk_socket__create Fix a compatibility problem when the old XDP_SHARED_UMEM mode is used together with the xsk_socket__create() call. In the old XDP_SHARED_UMEM mode, only sharing of the same device and queue id was allowed, and in this mode, the fill ring and completion ring were shared between the AF_XDP sockets. Therefore, it was perfectly fine to call the xsk_socket__create() API for each socket and not use the new xsk_socket__create_shared() API. This behavior was ruined by the commit introducing XDP_SHARED_UMEM support between different devices and/or queue ids. This patch restores the ability to use xsk_socket__create in these circumstances so that backward compatibility is not broken. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1602070946-11154-1-git-send-email-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 30b4ca5d2ac7..e3c98c007825 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -705,7 +705,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, struct xsk_ctx *ctx; int err, ifindex; - if (!umem || !xsk_ptr || !(rx || tx) || !fill || !comp) + if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; xsk = calloc(1, sizeof(*xsk)); @@ -735,6 +735,11 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, ctx = xsk_get_ctx(umem, ifindex, queue_id); if (!ctx) { + if (!fill || !comp) { + err = -EFAULT; + goto out_socket; + } + ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, fill, comp); if (!ctx) { -- cgit v1.3.1 From 7566616fb968dcc9f11d3f6b57132d33acab4e35 Mon Sep 17 00:00:00 2001 From: Jonathan Doman Date: Wed, 30 Sep 2020 15:26:36 -0700 Subject: tools/power/x86/intel-speed-select: Fix missing base-freq core IDs The reported base-freq high-priority-cpu-list was potentially omitting some cpus, due to incorrectly using a logical core count to constrain the size of a physical punit core ID mask. We may need to read both high and low PBF CORE_MASK values regardless of the logical core count. Signed-off-by: Jonathan Doman Signed-off-by: Srinivas Pandruvada Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/isst-config.c | 21 ++++++++++++--------- tools/power/x86/intel-speed-select/isst-core.c | 8 ++++---- tools/power/x86/intel-speed-select/isst.h | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 9f4b190f1d74..0bba5be8d1fd 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -545,20 +545,23 @@ static void set_cpu_present_cpu_mask(void) } } -int get_core_count(int pkg_id, int die_id) +int get_max_punit_core_id(int pkg_id, int die_id) { - int cnt = 0; + int max_id = 0; + int i; - if (pkg_id < MAX_PACKAGE_COUNT && die_id < MAX_DIE_PER_PACKAGE) { - int i; + for (i = 0; i < topo_max_cpus; ++i) + { + if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask)) + continue; - for (i = 0; i < sizeof(long long) * 8; ++i) { - if (core_mask[pkg_id][die_id] & (1ULL << i)) - cnt++; - } + if (cpu_map[i].pkg_id == pkg_id && + cpu_map[i].die_id == die_id && + cpu_map[i].punit_cpu_core > max_id) + max_id = cpu_map[i].punit_cpu_core; } - return cnt; + return max_id; } int get_cpu_count(int pkg_id, int die_id) diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index a7f4337c5777..1d7ecb54352e 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -396,7 +396,7 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info) { struct isst_pkg_ctdp_level_info ctdp_level; struct isst_pkg_ctdp pkg_dev; - int i, ret, core_cnt, max; + int i, ret, max_punit_core, max_mask_index; unsigned int req, resp; ret = isst_get_ctdp_levels(cpu, &pkg_dev); @@ -421,10 +421,10 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info) pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask); - core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu)); - max = core_cnt > 32 ? 2 : 1; + max_punit_core = get_max_punit_core_id(get_physical_package_id(cpu), get_physical_die_id(cpu)); + max_mask_index = max_punit_core > 32 ? 2 : 1; - for (i = 0; i < max; ++i) { + for (i = 0; i < max_mask_index; ++i) { unsigned long long mask; int count; diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 094ba4589a9c..29715e9c2e06 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -170,7 +170,7 @@ struct isst_pkg_ctdp { extern int get_topo_max_cpus(void); extern int get_cpu_count(int pkg_id, int die_id); -extern int get_core_count(int pkg_id, int die_id); +extern int get_max_punit_core_id(int pkg_id, int die_id); /* Common interfaces */ FILE *get_output_file(void); -- cgit v1.3.1 From e529412f3211071b4787a8e7e153c8ced3c22a28 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 6 Oct 2020 06:26:31 -0700 Subject: tools/power/x86/intel-speed-select: Update version for v5.10 Update version for changes released with v5.10 kernel release. Signed-off-by: Srinivas Pandruvada Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/isst-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 0bba5be8d1fd..cd089a505859 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.5"; +static const char *version_str = "v1.6"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; -- cgit v1.3.1 From 47f7cf6325f7d38e33d3dc38fa55bbe605f5b335 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 7 Oct 2020 17:10:21 -0700 Subject: libbpf: Skip CO-RE relocations for not loaded BPF programs Bypass CO-RE relocations step for BPF programs that are not going to be loaded. This allows to have BPF programs compiled in and disabled dynamically if kernel is not supposed to provide enough relocation information. In such case, there won't be unnecessary warnings about failed relocations. Fixes: d929758101fc ("libbpf: Support disabling auto-loading BPF programs") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201008001025.292064-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index faec389c4849..07d62771472f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5765,6 +5765,11 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = -EINVAL; goto out; } + /* no need to apply CO-RE relocation if the program is + * not going to be loaded + */ + if (!prog->load) + continue; err = bpf_core_apply_relo(prog, rec, i, obj->btf, targ_btf, cand_cache); -- cgit v1.3.1 From a66345bcbdf0c5070479c26103f7cb33da9e4969 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 7 Oct 2020 17:10:22 -0700 Subject: libbpf: Support safe subset of load/store instruction resizing with CO-RE Add support for patching instructions of the following form: - rX = *(T *)(rY + ); - *(T *)(rX + ) = rY; - *(T *)(rX + ) = , where T is one of {u8, u16, u32, u64}. For such instructions, if the actual kernel field recorded in CO-RE relocation has a different size than the one recorded locally (e.g., from vmlinux.h), then libbpf will adjust T to an appropriate 1-, 2-, 4-, or 8-byte loads. In general, such transformation is not always correct and could lead to invalid final value being loaded or stored. But two classes of cases are always safe: - if both local and target (kernel) types are unsigned integers, but of different sizes, then it's OK to adjust load/store instruction according to the necessary memory size. Zero-extending nature of such instructions and unsignedness make sure that the final value is always correct; - pointer size mismatch between BPF target architecture (which is always 64-bit) and 32-bit host kernel architecture can be similarly resolved automatically, because pointer is essentially an unsigned integer. Loading 32-bit pointer into 64-bit BPF register with zero extension will leave correct pointer in the register. Both cases are necessary to support CO-RE on 32-bit kernels, as `unsigned long` in vmlinux.h generated from 32-bit kernel is 32-bit, but when compiled with BPF program for BPF target it will be treated by compiler as 64-bit integer. Similarly, pointers in vmlinux.h are 32-bit for kernel, but treated as 64-bit values by compiler for BPF target. Both problems are now resolved by libbpf for direct memory reads. But similar transformations are useful in general when kernel fields are "resized" from, e.g., unsigned int to unsigned long (or vice versa). Now, similar transformations for signed integers are not safe to perform as they will result in incorrect sign extension of the value. If such situation is detected, libbpf will emit helpful message and will poison the instruction. Not failing immediately means that it's possible to guard the instruction based on kernel version (or other conditions) and make sure it's not reachable. If there is a need to read signed integers that change sizes between different kernels, it's possible to use BPF_CORE_READ_BITFIELD() macro, which works both with bitfields and non-bitfield integers of any signedness and handles sign-extension properly. Also, bpf_core_read() with proper size and/or use of bpf_core_field_size() relocation could allow to deal with such complicated situations explicitly, if not so conventiently as direct memory reads. Selftests added in a separate patch in progs/test_core_autosize.c demonstrate both direct memory and probed use cases. BPF_CORE_READ() is not changed and it won't deal with such situations as automatically as direct memory reads due to the signedness integer limitations, which are much harder to detect and control with compiler macro magic. So it's encouraged to utilize direct memory reads as much as possible. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201008001025.292064-3-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 136 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 07d62771472f..032cf0049ddb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5040,16 +5040,19 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, static int bpf_core_calc_field_relo(const struct bpf_program *prog, const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, bool *validate) + __u32 *val, __u32 *field_sz, __u32 *type_id, + bool *validate) { const struct bpf_core_accessor *acc; const struct btf_type *t; - __u32 byte_off, byte_sz, bit_off, bit_sz; + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; const struct btf_member *m; const struct btf_type *mt; bool bitfield; __s64 sz; + *field_sz = 0; + if (relo->kind == BPF_FIELD_EXISTS) { *val = spec ? 1 : 0; return 0; @@ -5065,6 +5068,12 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, if (!acc->name) { if (relo->kind == BPF_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; + /* remember field size for load/store mem size */ + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; + *field_sz = sz; + *type_id = acc->type_id; } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { sz = btf__resolve_size(spec->btf, acc->type_id); if (sz < 0) @@ -5081,7 +5090,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, } m = btf_members(t) + acc->idx; - mt = skip_mods_and_typedefs(spec->btf, m->type, NULL); + mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); bit_off = spec->bit_offset; bit_sz = btf_member_bitfield_size(t, acc->idx); @@ -5101,7 +5110,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, byte_off = bit_off / 8 / byte_sz * byte_sz; } } else { - sz = btf__resolve_size(spec->btf, m->type); + sz = btf__resolve_size(spec->btf, field_type_id); if (sz < 0) return -EINVAL; byte_sz = sz; @@ -5119,6 +5128,10 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, switch (relo->kind) { case BPF_FIELD_BYTE_OFFSET: *val = byte_off; + if (!bitfield) { + *field_sz = byte_sz; + *type_id = field_type_id; + } break; case BPF_FIELD_BYTE_SIZE: *val = byte_sz; @@ -5219,6 +5232,19 @@ struct bpf_core_relo_res bool poison; /* some relocations can't be validated against orig_val */ bool validate; + /* for field byte offset relocations or the forms: + * *(T *)(rX + ) = rY + * rX = *(T *)(rY + ), + * we remember original and resolved field size to adjust direct + * memory loads of pointers and integers; this is necessary for 32-bit + * host kernel architectures, but also allows to automatically + * relocate fields that were resized from, e.g., u32 to u64, etc. + */ + bool fail_memsz_adjust; + __u32 orig_sz; + __u32 orig_type_id; + __u32 new_sz; + __u32 new_type_id; }; /* Calculate original and target relocation values, given local and target @@ -5240,10 +5266,56 @@ static int bpf_core_calc_relo(const struct bpf_program *prog, res->new_val = 0; res->poison = false; res->validate = true; + res->fail_memsz_adjust = false; + res->orig_sz = res->new_sz = 0; + res->orig_type_id = res->new_type_id = 0; if (core_relo_is_field_based(relo->kind)) { - err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate); - err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL); + err = bpf_core_calc_field_relo(prog, relo, local_spec, + &res->orig_val, &res->orig_sz, + &res->orig_type_id, &res->validate); + err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, + &res->new_val, &res->new_sz, + &res->new_type_id, NULL); + if (err) + goto done; + /* Validate if it's safe to adjust load/store memory size. + * Adjustments are performed only if original and new memory + * sizes differ. + */ + res->fail_memsz_adjust = false; + if (res->orig_sz != res->new_sz) { + const struct btf_type *orig_t, *new_t; + + orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); + new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); + + /* There are two use cases in which it's safe to + * adjust load/store's mem size: + * - reading a 32-bit kernel pointer, while on BPF + * size pointers are always 64-bit; in this case + * it's safe to "downsize" instruction size due to + * pointer being treated as unsigned integer with + * zero-extended upper 32-bits; + * - reading unsigned integers, again due to + * zero-extension is preserving the value correctly. + * + * In all other cases it's incorrect to attempt to + * load/store field because read value will be + * incorrect, so we poison relocated instruction. + */ + if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) + goto done; + if (btf_is_int(orig_t) && btf_is_int(new_t) && + btf_int_encoding(orig_t) != BTF_INT_SIGNED && + btf_int_encoding(new_t) != BTF_INT_SIGNED) + goto done; + + /* mark as invalid mem size adjustment, but this will + * only be checked for LDX/STX/ST insns + */ + res->fail_memsz_adjust = true; + } } else if (core_relo_is_type_based(relo->kind)) { err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); @@ -5252,6 +5324,7 @@ static int bpf_core_calc_relo(const struct bpf_program *prog, err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); } +done: if (err == -EUCLEAN) { /* EUCLEAN is used to signal instruction poisoning request */ res->poison = true; @@ -5291,6 +5364,28 @@ static bool is_ldimm64(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +static int insn_bpf_size_to_bytes(struct bpf_insn *insn) +{ + switch (BPF_SIZE(insn->code)) { + case BPF_DW: return 8; + case BPF_W: return 4; + case BPF_H: return 2; + case BPF_B: return 1; + default: return -1; + } +} + +static int insn_bytes_to_bpf_size(__u32 sz) +{ + switch (sz) { + case 8: return BPF_DW; + case 4: return BPF_W; + case 2: return BPF_H; + case 1: return BPF_B; + default: return -1; + } +} + /* * Patch relocatable BPF instruction. * @@ -5300,10 +5395,13 @@ static bool is_ldimm64(struct bpf_insn *insn) * spec, and is checked before patching instruction. If actual insn->imm value * is wrong, bail out with error. * - * Currently three kinds of BPF instructions are supported: + * Currently supported classes of BPF instruction are: * 1. rX = (assignment with immediate operand); * 2. rX += (arithmetic operations with immediate operand); - * 3. rX = (load with 64-bit immediate value). + * 3. rX = (load with 64-bit immediate value); + * 4. rX = *(T *)(rY + ), where T is one of {u8, u16, u32, u64}; + * 5. *(T *)(rX + ) = rY, where T is one of {u8, u16, u32, u64}; + * 6. *(T *)(rX + ) = , where T is one of {u8, u16, u32, u64}. */ static int bpf_core_patch_insn(struct bpf_program *prog, const struct bpf_core_relo *relo, @@ -5327,6 +5425,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog, class = BPF_CLASS(insn->code); if (res->poison) { +poison: /* poison second part of ldimm64 to avoid confusing error from * verifier about "unknown opcode 00" */ @@ -5369,10 +5468,39 @@ static int bpf_core_patch_insn(struct bpf_program *prog, prog->name, relo_idx, insn_idx, new_val); return -ERANGE; } + if (res->fail_memsz_adjust) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " + "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", + prog->name, relo_idx, insn_idx); + goto poison; + } + orig_val = insn->off; insn->off = new_val; pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", prog->name, relo_idx, insn_idx, orig_val, new_val); + + if (res->new_sz != res->orig_sz) { + int insn_bytes_sz, insn_bpf_sz; + + insn_bytes_sz = insn_bpf_size_to_bytes(insn); + if (insn_bytes_sz != res->orig_sz) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", + prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); + return -EINVAL; + } + + insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); + if (insn_bpf_sz < 0) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", + prog->name, relo_idx, insn_idx, res->new_sz); + return -EINVAL; + } + + insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", + prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz); + } break; case BPF_LD: { __u64 imm; -- cgit v1.3.1 From 2b7d88c2b582c659a6567e851d96873d0209a501 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 7 Oct 2020 17:10:23 -0700 Subject: libbpf: Allow specifying both ELF and raw BTF for CO-RE BTF override Use generalized BTF parsing logic, making it possible to parse BTF both from ELF file, as well as a raw BTF dump. This makes it easier to write custom tests with manually generated BTFs. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201008001025.292064-4-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 032cf0049ddb..105cb8ad1d75 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5842,7 +5842,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) return 0; if (targ_btf_path) - targ_btf = btf__parse_elf(targ_btf_path, NULL); + targ_btf = btf__parse(targ_btf_path, NULL); else targ_btf = obj->btf_vmlinux; if (IS_ERR_OR_NULL(targ_btf)) { -- cgit v1.3.1 From 888d83b961f6057aa377065b60c5206fd8bff97c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 7 Oct 2020 17:10:24 -0700 Subject: selftests/bpf: Validate libbpf's auto-sizing of LD/ST/STX instructions Add selftests validating libbpf's auto-resizing of load/store instructions when used with CO-RE relocations. An explicit and manual approach with using bpf_core_read() is also demonstrated and tested. Separate BPF program is supposed to fail due to using signed integers of sizes that differ from kernel's sizes. To reliably simulate 32-bit BTF (i.e., the one with sizeof(long) == sizeof(void *) == 4), selftest generates its own custom BTF and passes it as a replacement for real kernel BTF. This allows to test 32/64-bitness mix on all architectures. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201008001025.292064-5-andrii@kernel.org --- .../selftests/bpf/prog_tests/core_autosize.c | 225 +++++++++++++++++++++ .../selftests/bpf/progs/test_core_autosize.c | 172 ++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/core_autosize.c create mode 100644 tools/testing/selftests/bpf/progs/test_core_autosize.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c new file mode 100644 index 000000000000..981c251453d9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include + +/* real layout and sizes according to test's (32-bit) BTF + * needs to be defined before skeleton is included */ +struct test_struct___real { + unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */ + unsigned int val2; + unsigned long long val1; + unsigned short val3; + unsigned char val4; + unsigned char _pad; +}; + +#include "test_core_autosize.skel.h" + +static int duration = 0; + +static struct { + unsigned long long ptr_samesized; + unsigned long long val1_samesized; + unsigned long long val2_samesized; + unsigned long long val3_samesized; + unsigned long long val4_samesized; + struct test_struct___real output_samesized; + + unsigned long long ptr_downsized; + unsigned long long val1_downsized; + unsigned long long val2_downsized; + unsigned long long val3_downsized; + unsigned long long val4_downsized; + struct test_struct___real output_downsized; + + unsigned long long ptr_probed; + unsigned long long val1_probed; + unsigned long long val2_probed; + unsigned long long val3_probed; + unsigned long long val4_probed; + + unsigned long long ptr_signed; + unsigned long long val1_signed; + unsigned long long val2_signed; + unsigned long long val3_signed; + unsigned long long val4_signed; + struct test_struct___real output_signed; +} out; + +void test_core_autosize(void) +{ + char btf_file[] = "/tmp/core_autosize.btf.XXXXXX"; + int err, fd = -1, zero = 0; + int char_id, short_id, int_id, long_long_id, void_ptr_id, id; + struct test_core_autosize* skel = NULL; + struct bpf_object_load_attr load_attr = {}; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct btf *btf = NULL; + size_t written; + const void *raw_data; + __u32 raw_sz; + FILE *f = NULL; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + return; + /* Emit the following struct with 32-bit pointer size: + * + * struct test_struct { + * void *ptr; + * unsigned long val2; + * unsigned long long val1; + * unsigned short val3; + * unsigned char val4; + * char: 8; + * }; + * + * This struct is going to be used as the "kernel BTF" for this test. + * It's equivalent memory-layout-wise to test_struct__real above. + */ + + /* force 32-bit pointer size */ + btf__set_pointer_size(btf, 4); + + char_id = btf__add_int(btf, "unsigned char", 1, 0); + ASSERT_EQ(char_id, 1, "char_id"); + short_id = btf__add_int(btf, "unsigned short", 2, 0); + ASSERT_EQ(short_id, 2, "short_id"); + /* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */ + int_id = btf__add_int(btf, "long unsigned int", 4, 0); + ASSERT_EQ(int_id, 3, "int_id"); + long_long_id = btf__add_int(btf, "unsigned long long", 8, 0); + ASSERT_EQ(long_long_id, 4, "long_long_id"); + void_ptr_id = btf__add_ptr(btf, 0); + ASSERT_EQ(void_ptr_id, 5, "void_ptr_id"); + + id = btf__add_struct(btf, "test_struct", 20 /* bytes */); + ASSERT_EQ(id, 6, "struct_id"); + err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0); + err = err ?: btf__add_field(btf, "val2", int_id, 32, 0); + err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0); + err = err ?: btf__add_field(btf, "val3", short_id, 128, 0); + err = err ?: btf__add_field(btf, "val4", char_id, 144, 0); + ASSERT_OK(err, "struct_fields"); + + fd = mkstemp(btf_file); + if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd)) + goto cleanup; + f = fdopen(fd, "w"); + if (!ASSERT_OK_PTR(f, "btf_fdopen")) + goto cleanup; + + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data")) + goto cleanup; + written = fwrite(raw_data, 1, raw_sz, f); + if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno)) + goto cleanup; + fflush(f); + fclose(f); + f = NULL; + close(fd); + fd = -1; + + /* open and load BPF program with custom BTF as the kernel BTF */ + skel = test_core_autosize__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + /* disable handle_signed() for now */ + prog = bpf_object__find_program_by_name(skel->obj, "handle_signed"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + bpf_program__set_autoload(prog, false); + + load_attr.obj = skel->obj; + load_attr.target_btf_path = btf_file; + err = bpf_object__load_xattr(&load_attr); + if (!ASSERT_OK(err, "prog_load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_samesize = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_downsize = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_probed"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_probed = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach")) + goto cleanup; + + usleep(1); + + bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss"); + if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) + goto cleanup; + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out); + if (!ASSERT_OK(err, "bss_lookup")) + goto cleanup; + + ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized"); + ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized"); + ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized"); + ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized"); + ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized"); + ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized"); + ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized"); + ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized"); + ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized"); + ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized"); + + ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized"); + ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized"); + ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized"); + ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized"); + ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized"); + ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized"); + ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized"); + ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized"); + ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized"); + ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized"); + + ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed"); + ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed"); + ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed"); + ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed"); + ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed"); + + test_core_autosize__destroy(skel); + skel = NULL; + + /* now re-load with handle_signed() enabled, it should fail loading */ + skel = test_core_autosize__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + load_attr.obj = skel->obj; + load_attr.target_btf_path = btf_file; + err = bpf_object__load_xattr(&load_attr); + if (!ASSERT_ERR(err, "bad_prog_load")) + goto cleanup; + +cleanup: + if (f) + fclose(f); + if (fd >= 0) + close(fd); + remove(btf_file); + btf__free(btf); + test_core_autosize__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c new file mode 100644 index 000000000000..44f5aa2e8956 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +/* fields of exactly the same size */ +struct test_struct___samesize { + void *ptr; + unsigned long long val1; + unsigned int val2; + unsigned short val3; + unsigned char val4; +} __attribute((preserve_access_index)); + +/* unsigned fields that have to be downsized by libbpf */ +struct test_struct___downsize { + void *ptr; + unsigned long val1; + unsigned long val2; + unsigned long val3; + unsigned long val4; + /* total sz: 40 */ +} __attribute__((preserve_access_index)); + +/* fields with signed integers of wrong size, should be rejected */ +struct test_struct___signed { + void *ptr; + long val1; + long val2; + long val3; + long val4; +} __attribute((preserve_access_index)); + +/* real layout and sizes according to test's (32-bit) BTF */ +struct test_struct___real { + unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */ + unsigned int val2; + unsigned long long val1; + unsigned short val3; + unsigned char val4; + unsigned char _pad; + /* total sz: 20 */ +}; + +struct test_struct___real input = { + .ptr = 0x01020304, + .val1 = 0x1020304050607080, + .val2 = 0x0a0b0c0d, + .val3 = 0xfeed, + .val4 = 0xb9, + ._pad = 0xff, /* make sure no accidental zeros are present */ +}; + +unsigned long long ptr_samesized = 0; +unsigned long long val1_samesized = 0; +unsigned long long val2_samesized = 0; +unsigned long long val3_samesized = 0; +unsigned long long val4_samesized = 0; +struct test_struct___real output_samesized = {}; + +unsigned long long ptr_downsized = 0; +unsigned long long val1_downsized = 0; +unsigned long long val2_downsized = 0; +unsigned long long val3_downsized = 0; +unsigned long long val4_downsized = 0; +struct test_struct___real output_downsized = {}; + +unsigned long long ptr_probed = 0; +unsigned long long val1_probed = 0; +unsigned long long val2_probed = 0; +unsigned long long val3_probed = 0; +unsigned long long val4_probed = 0; + +unsigned long long ptr_signed = 0; +unsigned long long val1_signed = 0; +unsigned long long val2_signed = 0; +unsigned long long val3_signed = 0; +unsigned long long val4_signed = 0; +struct test_struct___real output_signed = {}; + +SEC("raw_tp/sys_exit") +int handle_samesize(void *ctx) +{ + struct test_struct___samesize *in = (void *)&input; + struct test_struct___samesize *out = (void *)&output_samesized; + + ptr_samesized = (unsigned long long)in->ptr; + val1_samesized = in->val1; + val2_samesized = in->val2; + val3_samesized = in->val3; + val4_samesized = in->val4; + + out->ptr = in->ptr; + out->val1 = in->val1; + out->val2 = in->val2; + out->val3 = in->val3; + out->val4 = in->val4; + + return 0; +} + +SEC("raw_tp/sys_exit") +int handle_downsize(void *ctx) +{ + struct test_struct___downsize *in = (void *)&input; + struct test_struct___downsize *out = (void *)&output_downsized; + + ptr_downsized = (unsigned long long)in->ptr; + val1_downsized = in->val1; + val2_downsized = in->val2; + val3_downsized = in->val3; + val4_downsized = in->val4; + + out->ptr = in->ptr; + out->val1 = in->val1; + out->val2 = in->val2; + out->val3 = in->val3; + out->val4 = in->val4; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_probed(void *ctx) +{ + struct test_struct___downsize *in = (void *)&input; + __u64 tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr); + ptr_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1); + val1_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2); + val2_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3); + val3_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4); + val4_probed = tmp; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_signed(void *ctx) +{ + struct test_struct___signed *in = (void *)&input; + struct test_struct___signed *out = (void *)&output_signed; + + val2_signed = in->val2; + val3_signed = in->val3; + val4_signed = in->val4; + + out->val2= in->val2; + out->val3= in->val3; + out->val4= in->val4; + + return 0; +} -- cgit v1.3.1 From 465e490d290b9670c3eef7406915facd58946244 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 7 Oct 2020 19:53:57 -0700 Subject: ACPICA: Tree-wide: fix various typos and spelling mistakes ACPICA commit 6648a6ac8410813bcfedb5c8345259dd155ea851 Fix spelling issues found using the codespell checker Link: https://github.com/acpica/acpica/commit/6648a6ac Signed-off-by: Colin Ian King Signed-off-by: Bob Moore Signed-off-by: Erik Kaneda Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dbinput.c | 2 +- drivers/acpi/acpica/nsxfobj.c | 3 ++- include/acpi/acconfig.h | 2 +- tools/power/acpi/os_specific/service_layers/oslinuxtbl.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c index ee6a1b77af3f..568d1b0b1f5d 100644 --- a/drivers/acpi/acpica/dbinput.c +++ b/drivers/acpi/acpica/dbinput.c @@ -436,7 +436,7 @@ static void acpi_db_display_help(char *command) acpi_os_printf("\n"); } else { - /* Display help for all commands that match the subtring */ + /* Display help for all commands that match the substring */ acpi_db_display_command_info(command, TRUE); } diff --git a/drivers/acpi/acpica/nsxfobj.c b/drivers/acpi/acpica/nsxfobj.c index c022bef263e5..324269481160 100644 --- a/drivers/acpi/acpica/nsxfobj.c +++ b/drivers/acpi/acpica/nsxfobj.c @@ -24,7 +24,8 @@ ACPI_MODULE_NAME("nsxfobj") * * RETURN: Status * - * DESCRIPTION: This routine returns the type associatd with a particular handle + * DESCRIPTION: This routine returns the type associated with a particular + * handle * ******************************************************************************/ acpi_status acpi_get_type(acpi_handle handle, acpi_object_type *ret_type) diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index 5940a3c68a96..a225eff499c8 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -121,7 +121,7 @@ * *****************************************************************************/ -/* Method info (in WALK_STATE), containing local variables and argumetns */ +/* Method info (in WALK_STATE), containing local variables and arguments */ #define ACPI_METHOD_NUM_LOCALS 8 #define ACPI_METHOD_MAX_LOCAL 7 diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index dd38c2b2e1b4..11c5046dce16 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -110,7 +110,7 @@ u32 gbl_table_count = 0; * * RETURN: Status; Converted from errno. * - * DESCRIPTION: Get last errno and conver it to acpi_status. + * DESCRIPTION: Get last errno and convert it to acpi_status. * *****************************************************************************/ -- cgit v1.3.1 From cbcd9c83695e0f24c0497e6db8b73c3819ab08d4 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 4 Oct 2020 17:14:08 -0500 Subject: selftests/ftrace: Add test case for synthetic event dynamic strings Add a selftest that defines and traces a synthetic event that uses a dynamic string event field. Link: https://lkml.kernel.org/r/74445afb005046d76d59fb06696a2ceaa164dec9.1601848695.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- .../trigger-synthetic-event-dynstring.tc | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc new file mode 100644 index 000000000000..3d65c856eca3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test inter-event histogram trigger trace action with dynamic string param +# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README + +fail() { #msg + echo $1 + exit_fail +} + +echo "Test create synthetic event" + +echo 'ping_test_latency u64 lat; char filename[]' > synthetic_events +if [ ! -d events/synthetic/ping_test_latency ]; then + fail "Failed to create ping_test_latency synthetic event" +fi + +echo "Test create histogram for synthetic event using trace action and dynamic strings" +echo "Test histogram dynamic string variables,simple expression support and trace action" + +echo 'hist:key=pid:filenamevar=filename:ts0=common_timestamp.usecs' > events/sched/sched_process_exec/trigger +echo 'hist:key=pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_process_exec).ping_test_latency($lat,$filenamevar) if comm == "ping"' > events/sched/sched_process_exit/trigger +echo 'hist:keys=filename,lat:sort=filename,lat' > events/synthetic/ping_test_latency/trigger + +ping $LOCALHOST -c 5 + +if ! grep -q "ping" events/synthetic/ping_test_latency/hist; then + fail "Failed to create dynamic string trace action inter-event histogram" +fi + +exit 0 -- cgit v1.3.1 From 71c87fbe720038007543abff7540ef0376c519f5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 19 Sep 2020 00:14:31 -0700 Subject: selftests/seccomp: Record syscall during ptrace entry In preparation for performing actions during ptrace syscall exit, save the syscall number during ptrace syscall entry. Some architectures do no have the syscall number available during ptrace syscall exit. Suggested-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/linux-kselftest/20200911181012.171027-1-cascardo@canonical.com/ Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/20200921074354.6shkt2e5yhzhj3sn@wittgenstein/ Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 40 ++++++++++++++++++--------- 1 file changed, 27 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index bc0fb463c709..c0311b4c736b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1949,12 +1949,19 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, } +FIXTURE(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; + long syscall_nr; +}; + void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { - int ret, nr; + int ret; unsigned long msg; static bool entry; + FIXTURE_DATA(TRACE_syscall) *self = args; /* * The traditional way to tell PTRACE_SYSCALL entry/exit @@ -1968,24 +1975,31 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); - if (!entry) + /* + * Some architectures only support setting return values during + * syscall exit under ptrace, and on exit the syscall number may + * no longer be available. Therefore, save the initial sycall + * number here, so it can be examined during both entry and exit + * phases. + */ + if (entry) + self->syscall_nr = get_syscall(_metadata, tracee); + else return; - nr = get_syscall(_metadata, tracee); - - if (nr == __NR_getpid) + switch (self->syscall_nr) { + case __NR_getpid: change_syscall(_metadata, tracee, __NR_getppid, 0); - if (nr == __NR_gettid) + break; + case __NR_gettid: change_syscall(_metadata, tracee, -1, 45000); - if (nr == __NR_openat) + break; + case __NR_openat: change_syscall(_metadata, tracee, -1, -ESRCH); + break; + } } -FIXTURE(TRACE_syscall) { - struct sock_fprog prog; - pid_t tracer, mytid, mypid, parent; -}; - FIXTURE_VARIANT(TRACE_syscall) { /* * All of the SECCOMP_RET_TRACE behaviors can be tested with either @@ -2044,7 +2058,7 @@ FIXTURE_SETUP(TRACE_syscall) self->tracer = setup_trace_fixture(_metadata, variant->use_ptrace ? tracer_ptrace : tracer_seccomp, - NULL, variant->use_ptrace); + self, variant->use_ptrace); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); -- cgit v1.3.1 From bef71f86b64de8b2eb5b2f26e1cbd7735e3551da Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 19 Sep 2020 00:21:34 -0700 Subject: selftests/seccomp: Allow syscall nr and ret value to be set separately In preparation for setting syscall nr and ret values separately, refactor the helpers to take a pointer to a value, so that a NULL can indicate "do not change this respective value". This is done to keep the regset read/write happening once and in one code path. Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/20200921075031.j4gruygeugkp2zwd@wittgenstein/ Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 59 +++++++++++++++++++++------ 1 file changed, 47 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c0311b4c736b..98ce5e8a6398 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1888,27 +1888,47 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) } /* Architecture-specific syscall changing routine. */ -void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall, int result) +void __change_syscall(struct __test_metadata *_metadata, + pid_t tracee, long *syscall, long *ret) { ARCH_REGS orig, regs; + /* Do not get/set registers if we have nothing to do. */ + if (!syscall && !ret) + return; + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return; } orig = regs; - SYSCALL_NUM_SET(regs, syscall); + if (syscall) + SYSCALL_NUM_SET(regs, *syscall); - /* If syscall is skipped, change return value. */ - if (syscall == -1) - SYSCALL_RET_SET(regs, result); + if (ret) + SYSCALL_RET_SET(regs, *ret); /* Flush any register changes made. */ if (memcmp(&orig, ®s, sizeof(orig)) != 0) EXPECT_EQ(0, ARCH_SETREGS(regs)); } +/* Change only syscall number. */ +void change_syscall_nr(struct __test_metadata *_metadata, + pid_t tracee, long syscall) +{ + __change_syscall(_metadata, tracee, &syscall, NULL); +} + +/* Change syscall return value (and set syscall number to -1). */ +void change_syscall_ret(struct __test_metadata *_metadata, + pid_t tracee, long ret) +{ + long syscall = -1; + + __change_syscall(_metadata, tracee, &syscall, &ret); +} + void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { @@ -1924,17 +1944,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid, 0); + change_syscall_nr(_metadata, tracee, __NR_getppid); break; case 0x1003: /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, 45000); + change_syscall_ret(_metadata, tracee, 45000); break; case 0x1004: /* skip openat with error. */ EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, -ESRCH); + change_syscall_ret(_metadata, tracee, -ESRCH); break; case 0x1005: /* do nothing (allow getppid) */ @@ -1961,6 +1981,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int ret; unsigned long msg; static bool entry; + long syscall_nr_val, syscall_ret_val; + long *syscall_nr = NULL, *syscall_ret = NULL; FIXTURE_DATA(TRACE_syscall) *self = args; /* @@ -1987,17 +2009,30 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, else return; + syscall_nr = &syscall_nr_val; + syscall_ret = &syscall_ret_val; + + /* Now handle the actual rewriting cases. */ switch (self->syscall_nr) { case __NR_getpid: - change_syscall(_metadata, tracee, __NR_getppid, 0); + syscall_nr_val = __NR_getppid; + /* Never change syscall return for this case. */ + syscall_ret = NULL; break; case __NR_gettid: - change_syscall(_metadata, tracee, -1, 45000); + syscall_nr_val = -1; + syscall_ret_val = 45000; break; case __NR_openat: - change_syscall(_metadata, tracee, -1, -ESRCH); + syscall_nr_val = -1; + syscall_ret_val = -ESRCH; break; + default: + /* Unhandled, do nothing. */ + return; } + + __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); } FIXTURE_VARIANT(TRACE_syscall) { -- cgit v1.3.1 From a39caac02f2f5819d39f37d7987babe19fcafe21 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Sep 2020 19:49:50 -0700 Subject: selftests/seccomp: powerpc: Set syscall return during ptrace syscall exit Some archs (like powerpc) only support changing the return code during syscall exit when ptrace is used. Test entry vs exit phases for which portions of the syscall number and return values need to be set at which different phases. For non-powerpc, all changes are made during ptrace syscall entry, as before. For powerpc, the syscall number is changed at ptrace syscall entry and the syscall return value is changed on ptrace syscall exit. Reported-by: Thadeu Lima de Souza Cascardo Suggested-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/linux-kselftest/20200911181012.171027-1-cascardo@canonical.com/ Fixes: 58d0a862f573 ("seccomp: add tests for ptrace hole") Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/20200921075300.7iylzof2w5vrutah@wittgenstein/ Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 98ce5e8a6398..894c2404d321 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1765,6 +1765,7 @@ TEST_F(TRACE_poke, getpid_runs_normally) (_regs).ccr &= ~0x10000000; \ } \ } while (0) +# define SYSCALL_RET_SET_ON_PTRACE_EXIT #elif defined(__s390__) # define ARCH_REGS s390_regs # define SYSCALL_NUM(_regs) (_regs).gprs[2] @@ -1853,6 +1854,18 @@ TEST_F(TRACE_poke, getpid_runs_normally) } while (0) #endif +/* + * Some architectures (e.g. powerpc) can only set syscall + * return values on syscall exit during ptrace. + */ +const bool ptrace_entry_set_syscall_nr = true; +const bool ptrace_entry_set_syscall_ret = +#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT + true; +#else + false; +#endif + /* * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). @@ -2006,11 +2019,15 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, */ if (entry) self->syscall_nr = get_syscall(_metadata, tracee); - else - return; - syscall_nr = &syscall_nr_val; - syscall_ret = &syscall_ret_val; + /* + * Depending on the architecture's syscall setting abilities, we + * pick which things to set during this phase (entry or exit). + */ + if (entry == ptrace_entry_set_syscall_nr) + syscall_nr = &syscall_nr_val; + if (entry == ptrace_entry_set_syscall_ret) + syscall_ret = &syscall_ret_val; /* Now handle the actual rewriting cases. */ switch (self->syscall_nr) { -- cgit v1.3.1 From e953aeaa913bedcdabc168276ef41c83ae75f161 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:19 -0700 Subject: selftests/clone3: Avoid OS-defined clone_args As the UAPI headers start to appear in distros, we need to avoid outdated versions of struct clone_args to be able to test modern features, named "struct __clone_args". Additionally update the struct size macro names to match UAPI names. Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/20200921075432.u4gis3s2o5qrsb5g@wittgenstein/ Signed-off-by: Kees Cook --- tools/testing/selftests/clone3/clone3.c | 45 +++++++++------------- .../clone3/clone3_cap_checkpoint_restore.c | 4 +- .../selftests/clone3/clone3_clear_sighand.c | 2 +- tools/testing/selftests/clone3/clone3_selftests.h | 24 +++++++----- tools/testing/selftests/clone3/clone3_set_tid.c | 4 +- tools/testing/selftests/pidfd/pidfd_setns_test.c | 2 +- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 +- 7 files changed, 41 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index b7e6dec36173..42be3b925830 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -20,13 +20,6 @@ #include "../kselftest.h" #include "clone3_selftests.h" -/* - * Different sizes of struct clone_args - */ -#ifndef CLONE3_ARGS_SIZE_V0 -#define CLONE3_ARGS_SIZE_V0 64 -#endif - enum test_mode { CLONE3_ARGS_NO_TEST, CLONE3_ARGS_ALL_0, @@ -38,13 +31,13 @@ enum test_mode { static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) { - struct clone_args args = { + struct __clone_args args = { .flags = flags, .exit_signal = SIGCHLD, }; struct clone_args_extended { - struct clone_args args; + struct __clone_args args; __aligned_u64 excess_space[2]; } args_ext; @@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) int status; memset(&args_ext, 0, sizeof(args_ext)); - if (size > sizeof(struct clone_args)) + if (size > sizeof(struct __clone_args)) args_ext.excess_space[1] = 1; if (size == 0) - size = sizeof(struct clone_args); + size = sizeof(struct __clone_args); switch (test_mode) { case CLONE3_ARGS_ALL_0: @@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) break; } - memcpy(&args_ext.args, &args, sizeof(struct clone_args)); + memcpy(&args_ext.args, &args, sizeof(struct __clone_args)); - pid = sys_clone3((struct clone_args *)&args_ext, size); + pid = sys_clone3((struct __clone_args *)&args_ext, size); if (pid < 0) { ksft_print_msg("%s - Failed to create new process\n", strerror(errno)); @@ -144,14 +137,14 @@ int main(int argc, char *argv[]) else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */ - test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */ + test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */ - test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */ + test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); /* Do a clone3() with sizeof(struct clone_args) + 8 */ - test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); + test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); /* Do a clone3() with exit_signal having highest 32 bits non-zero */ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG); @@ -165,31 +158,31 @@ int main(int argc, char *argv[]) /* Do a clone3() with NSIG < exit_signal < CSIG */ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG); - test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0); + test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0); - test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG, + test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG, CLONE3_ARGS_ALL_0); - test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG, + test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG, CLONE3_ARGS_ALL_0); /* Do a clone3() with > page size */ test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */ + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */ if (uid == 0) - test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0, + test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */ - test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */ + test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */ if (uid == 0) - test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0, + test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 9562425aa0a9..55bd387ce7ec 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata, int status; pid_t pid = -1; - struct clone_args args = { + struct __clone_args args = { .exit_signal = SIGCHLD, .set_tid = ptr_to_u64(set_tid), .set_tid_size = set_tid_size, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(args)); if (pid < 0) { TH_LOG("%s - Failed to create new process", strerror(errno)); return -errno; diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c index db5fc9c5edcf..47a8c0fc3676 100644 --- a/tools/testing/selftests/clone3/clone3_clear_sighand.c +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void) { int ret; pid_t pid; - struct clone_args args = {}; + struct __clone_args args = {}; struct sigaction act; /* diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index 91c1a78ddb39..e81ffaaee02b 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -19,13 +19,11 @@ #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ #endif -#ifndef CLONE_ARGS_SIZE_VER0 -#define CLONE_ARGS_SIZE_VER0 64 -#endif - #ifndef __NR_clone3 #define __NR_clone3 -1 -struct clone_args { +#endif + +struct __clone_args { __aligned_u64 flags; __aligned_u64 pidfd; __aligned_u64 child_tid; @@ -34,15 +32,21 @@ struct clone_args { __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; -#define CLONE_ARGS_SIZE_VER1 80 +#ifndef CLONE_ARGS_SIZE_VER0 +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ +#endif __aligned_u64 set_tid; __aligned_u64 set_tid_size; -#define CLONE_ARGS_SIZE_VER2 88 +#ifndef CLONE_ARGS_SIZE_VER1 +#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +#endif __aligned_u64 cgroup; +#ifndef CLONE_ARGS_SIZE_VER2 +#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ +#endif }; -#endif /* __NR_clone3 */ -static pid_t sys_clone3(struct clone_args *args, size_t size) +static pid_t sys_clone3(struct __clone_args *args, size_t size) { fflush(stdout); fflush(stderr); @@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size) static inline void test_clone3_supported(void) { pid_t pid; - struct clone_args args = {}; + struct __clone_args args = {}; if (__NR_clone3 < 0) ksft_exit_skip("clone3() syscall is not supported\n"); diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c index 5831c1082d6d..0229e9ebb995 100644 --- a/tools/testing/selftests/clone3/clone3_set_tid.c +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid, int status; pid_t pid = -1; - struct clone_args args = { + struct __clone_args args = { .flags = flags, .exit_signal = SIGCHLD, .set_tid = ptr_to_u64(set_tid), .set_tid_size = set_tid_size, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(args)); if (pid < 0) { ksft_print_msg("%s - Failed to create new process\n", strerror(errno)); diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 7dca1aa4672d..1f085b922c6e 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options) pid_t create_child(int *pidfd, unsigned flags) { - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_PIDFD | flags, .exit_signal = SIGCHLD, .pidfd = ptr_to_u64(pidfd), diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 894c2404d321..4a180439ee9e 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3817,7 +3817,7 @@ TEST(user_notification_filter_empty) long ret; int status; struct pollfd pollfd; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; @@ -3871,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded) long ret; int status; struct pollfd pollfd; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; -- cgit v1.3.1 From 036dfd8322be5214cbc19a0bdcab5a72763c6dcd Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 6 Oct 2020 18:06:30 +0200 Subject: selftests: mptcp: interpret \n as a new line In case of errors, this message was printed: (...) balanced bwidth with unbalanced delay 5233 max 5005 [ fail ] client exit code 0, server 0 \nnetns ns3-0-EwnkPH socket stat for 10003: (...) Obviously, the idea was to add a new line before the socket stat and not print "\nnetns". The commit 8b974778f998 ("selftests: mptcp: interpret \n as a new line") is very similar to this one. But the modification in simult_flows.sh was missed because this commit above was done in parallel to one here below. Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Signed-off-by: Matthieu Baerts Acked-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 0d88225daa02..2f649b431456 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -200,9 +200,9 @@ do_transfer() echo " [ fail ]" echo "client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${ns3} socket stat for $port:" 1>&2 + echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${ns1} socket stat for $port:" 1>&2 + echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2 ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" ls -l $sin $cout ls -l $cin $sout -- cgit v1.3.1 From 01361b665a26ef0c087e53f14cf310d1cfe0cf98 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 9 Oct 2020 11:52:22 +0200 Subject: tests: remove O_NONBLOCK before waiting for WSTOPPED Naresh reported that selftests: pidfd: pidfd_wait hangs on linux next kernel on x86_64, i386 and arm64 Juno-r2 These devices are using NFS mounted rootfs. I have tested pidfd testcases independently and all test PASS. The Hang or exit from test run noticed when run by run_kselftest.sh pidfd_wait.c:208:wait_nonblock:Expected sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL) (-1) == 0 (0) wait_nonblock: Test terminated by assertion metadata: git branch: master git repo: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git git commit: e64997027d5f171148687e58b78c8b3c869a6158 git describe: next-20200922 make_kernelversion: 5.9.0-rc6 kernel-config: http://snapshots.linaro.org/openembedded/lkft/lkft/sumo/intel-core2-32/lkft/linux-next/865/config The reason for this is a simple race in the selftests, that I overlooked and which is more likely to hit when there's a lot of processes running on the system. Basically the child process hasn't SIGSTOPed itself yet but the parent is already calling waitid() on a O_NONBLOCK pidfd. Since it doesn't find a WSTOPPED process it returns -EAGAIN correctly. The fix for this is to move the line where we're removing the O_NONBLOCK property from the fd before the waitid() WSTOPPED call so we hang until the child becomes stopped. Fixes: cd89597bbe5a ("tests: add waitid() tests for non-blocking pidfds") Reported-by: Naresh Kamboju Tested-by: Naresh Kamboju Link: https://lkft.validation.linaro.org/scheduler/job/1813223 Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/pidfd_wait.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 4063d6f31fa4..be2943f072f6 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -205,6 +205,8 @@ TEST(wait_nonblock) ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); ASSERT_EQ(ret, 0); + ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); @@ -212,8 +214,6 @@ TEST(wait_nonblock) ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_EXITED); -- cgit v1.3.1 From a968433723310f35898b4a2f635a7991aeef66b1 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 22 Sep 2020 16:21:40 -0700 Subject: kbuild: explicitly specify the build id style ld's --build-id defaults to "sha1" style, while lld defaults to "fast". The build IDs are very different between the two, which may confuse programs that reference them. Signed-off-by: Bill Wendling Acked-by: David S. Miller Signed-off-by: Masahiro Yamada --- Makefile | 4 ++-- arch/arm/vdso/Makefile | 2 +- arch/arm64/kernel/vdso/Makefile | 2 +- arch/arm64/kernel/vdso32/Makefile | 2 +- arch/mips/vdso/Makefile | 2 +- arch/riscv/kernel/vdso/Makefile | 2 +- arch/s390/kernel/vdso64/Makefile | 2 +- arch/sparc/vdso/Makefile | 2 +- arch/x86/entry/vdso/Makefile | 2 +- tools/testing/selftests/bpf/Makefile | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/Makefile b/Makefile index e39ed7967cd6..088fb7d49dd7 100644 --- a/Makefile +++ b/Makefile @@ -982,8 +982,8 @@ KBUILD_CPPFLAGS += $(KCPPFLAGS) KBUILD_AFLAGS += $(KAFLAGS) KBUILD_CFLAGS += $(KCFLAGS) -KBUILD_LDFLAGS_MODULE += --build-id -LDFLAGS_vmlinux += --build-id +KBUILD_LDFLAGS_MODULE += --build-id=sha1 +LDFLAGS_vmlinux += --build-id=sha1 ifeq ($(CONFIG_STRIP_ASM_SYMS),y) LDFLAGS_vmlinux += $(call ld-option, -X,) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index a54f70731d9f..150ce6e6a5d3 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -19,7 +19,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \ - --hash-style=sysv --build-id \ + --hash-style=sysv --build-id=sha1 \ -T obj-$(CONFIG_VDSO) += vdso.o diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 7cd8aafbe96e..3dc4b65da99d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,7 +24,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id -n \ + -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id=sha1 -n \ $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 1259e5bd0fab..7f96a1a9f68c 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -128,7 +128,7 @@ VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft VDSO_LDFLAGS += -Wl,--hash-style=sysv -VDSO_LDFLAGS += -Wl,--build-id +VDSO_LDFLAGS += -Wl,--build-id=sha1 VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 57fe83235281..5810cc12bc1d 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -61,7 +61,7 @@ endif # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ - -G 0 --eh-frame-hdr --hash-style=sysv --build-id -T + -G 0 --eh-frame-hdr --hash-style=sysv --build-id=sha1 -T CFLAGS_REMOVE_vdso.o = -pg diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 478e7338ddc1..7d6a94d45ec9 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -49,7 +49,7 @@ $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE # refer to these symbols in the kernel code rather than hand-coded addresses. SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--build-id -Wl,--hash-style=both + -Wl,--build-id=sha1 -Wl,--hash-style=both $(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE $(call if_changed,vdsold) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index d0d406cfffa9..9acad762456d 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -19,7 +19,7 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ - --hash-style=both --build-id -T + --hash-style=both --build-id=sha1 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index f44355e46f31..469dd23887ab 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -115,7 +115,7 @@ quiet_cmd_vdso = VDSO $@ -T $(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@' -VDSO_LDFLAGS = -shared --hash-style=both --build-id -Bsymbolic +VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic GCOV_PROFILE := n # diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 215376d975a2..ebba25ed9a38 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -176,7 +176,7 @@ quiet_cmd_vdso = VDSO $@ -T $(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' -VDSO_LDFLAGS = -shared --hash-style=both --build-id \ +VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \ $(call ld-option, --eh-frame-hdr) -Bsymbolic GCOV_PROFILE := n diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fc946b7ac288..daf186f88a63 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -133,7 +133,7 @@ $(OUTPUT)/%:%.c $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1 $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) -- cgit v1.3.1 From eca43ee6c46db92dd850ce659316b0680d70e137 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Fri, 9 Oct 2020 07:03:25 +0000 Subject: bpf: Add tcp_notsent_lowat bpf setsockopt Adding support for TCP_NOTSENT_LOWAT sockoption (https://lwn.net/Articles/560082/) in tcp bpf programs. Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201009070325.226855-1-tehnerd@tehnerd.com --- include/uapi/linux/bpf.h | 2 +- net/core/filter.c | 4 ++++ tools/include/uapi/linux/bpf.h | 2 +- tools/testing/selftests/bpf/progs/connect4_prog.c | 19 +++++++++++++++++++ 4 files changed, 25 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d83561e8cd2c..42d2df799397 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1698,7 +1698,7 @@ union bpf_attr { * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return diff --git a/net/core/filter.c b/net/core/filter.c index 05df73780dd3..5da44b11e1ec 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4827,6 +4827,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, else icsk->icsk_user_timeout = val; break; + case TCP_NOTSENT_LOWAT: + tp->notsent_lowat = val; + sk->sk_write_space(sk); + break; default: ret = -EINVAL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d83561e8cd2c..42d2df799397 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1698,7 +1698,7 @@ union bpf_attr { * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index b1b2773c0b9d..a943d394fd3a 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -23,6 +23,10 @@ #define TCP_CA_NAME_MAX 16 #endif +#ifndef TCP_NOTSENT_LOWAT +#define TCP_NOTSENT_LOWAT 25 +#endif + #ifndef IFNAMSIZ #define IFNAMSIZ 16 #endif @@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx) return 0; } +static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx) +{ + int lowat = 65535; + + if (ctx->type == SOCK_STREAM) { + if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat))) + return 1; + } + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) if (set_keepalive(ctx)) return 0; + if (set_notsent_lowat(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) -- cgit v1.3.1 From 75748837b7e56919679e02163f45d5818c644d03 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 8 Oct 2020 18:12:37 -0700 Subject: bpf: Propagate scalar ranges through register assignments. The llvm register allocator may use two different registers representing the same virtual register. In such case the following pattern can be observed: 1047: (bf) r9 = r6 1048: (a5) if r6 < 0x1000 goto pc+1 1050: ... 1051: (a5) if r9 < 0x2 goto pc+66 1052: ... 1053: (bf) r2 = r9 /* r2 needs to have upper and lower bounds */ This is normal behavior of greedy register allocator. The slides 137+ explain why regalloc introduces such register copy: http://llvm.org/devmtg/2018-04/slides/Yatsina-LLVM%20Greedy%20Register%20Allocator.pdf There is no way to tell llvm 'not to do this'. Hence the verifier has to recognize such patterns. In order to track this information without backtracking allocate ID for scalars in a similar way as it's done for find_good_pkt_pointers(). When the verifier encounters r9 = r6 assignment it will assign the same ID to both registers. Later if either register range is narrowed via conditional jump propagate the register state into the other register. Clear register ID in adjust_reg_min_max_vals() for any alu instruction. The register ID is ignored for scalars in regsafe() and doesn't affect state pruning. mark_reg_unknown() clears the ID. It's used to process call, endian and other instructions. Hence ID is explicitly cleared only in adjust_reg_min_max_vals() and in 32-bit mov. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201009011240.48506-2-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 50 ++++++++++++++++++++++ tools/testing/selftests/bpf/prog_tests/align.c | 16 +++---- .../selftests/bpf/verifier/direct_packet_access.c | 2 +- 3 files changed, 59 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 62b804651a48..ba96f7e9bbc0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6436,6 +6436,11 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, src_reg = NULL; if (dst_reg->type != SCALAR_VALUE) ptr_reg = dst_reg; + else + /* Make sure ID is cleared otherwise dst_reg min/max could be + * incorrectly propagated into other registers by find_equal_scalars() + */ + dst_reg->id = 0; if (BPF_SRC(insn->code) == BPF_X) { src_reg = ®s[insn->src_reg]; if (src_reg->type != SCALAR_VALUE) { @@ -6569,6 +6574,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) /* case: R1 = R2 * copy register state to dest reg */ + if (src_reg->type == SCALAR_VALUE && !src_reg->id) + /* Assign src and dst registers the same ID + * that will be used by find_equal_scalars() + * to propagate min/max range. + */ + src_reg->id = ++env->id_gen; *dst_reg = *src_reg; dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = DEF_NOT_SUBREG; @@ -6581,6 +6592,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } else if (src_reg->type == SCALAR_VALUE) { *dst_reg = *src_reg; + /* Make sure ID is cleared otherwise + * dst_reg min/max could be incorrectly + * propagated into src_reg by find_equal_scalars() + */ + dst_reg->id = 0; dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = env->insn_idx + 1; } else { @@ -7369,6 +7385,30 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, return true; } +static void find_equal_scalars(struct bpf_verifier_state *vstate, + struct bpf_reg_state *known_reg) +{ + struct bpf_func_state *state; + struct bpf_reg_state *reg; + int i, j; + + for (i = 0; i <= vstate->curframe; i++) { + state = vstate->frame[i]; + for (j = 0; j < MAX_BPF_REG; j++) { + reg = &state->regs[j]; + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + *reg = *known_reg; + } + + bpf_for_each_spilled_reg(j, state, reg) { + if (!reg) + continue; + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + *reg = *known_reg; + } + } +} + static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { @@ -7497,6 +7537,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, reg_combine_min_max(&other_branch_regs[insn->src_reg], &other_branch_regs[insn->dst_reg], src_reg, dst_reg, opcode); + if (src_reg->id) { + find_equal_scalars(this_branch, src_reg); + find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); + } + } } else if (dst_reg->type == SCALAR_VALUE) { reg_set_min_max(&other_branch_regs[insn->dst_reg], @@ -7504,6 +7549,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, opcode, is_jmp32); } + if (dst_reg->type == SCALAR_VALUE && dst_reg->id) { + find_equal_scalars(this_branch, dst_reg); + find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]); + } + /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). * NOTE: these optimizations below are related with pointer comparison * which will never be JMP32. diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index c548aded6585..52414058a627 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -195,13 +195,13 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, @@ -518,7 +518,7 @@ static struct bpf_align_test tests[] = { * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, + {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, }, }, @@ -561,18 +561,18 @@ static struct bpf_align_test tests[] = { /* Adding 14 makes R6 be (4n+2) */ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + {20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, }, }, }; diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index 2c5fbe7bcd27..ae72536603fe 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -529,7 +529,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)", .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { -- cgit v1.3.1 From 03d4d13fab3fa75fbcf09bced5e3c8acf1622969 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 8 Oct 2020 18:12:39 -0700 Subject: selftests/bpf: Add profiler test The main purpose of the profiler test to check different llvm generation patterns to make sure the verifier can load these large programs. Note that profiler.inc.h test doesn't follow strict kernel coding style. The code was formatted in the kernel style, but variable declarations are kept as-is to preserve original llvm IR pattern. profiler1.c should pass with older and newer llvm profiler[23].c may fail on older llvm that don't have: https://reviews.llvm.org/D85570 because llvm may do speculative code motion optimization that will generate code like this: // r9 is a pointer to map_value // r7 is a scalar 17: bf 96 00 00 00 00 00 00 r6 = r9 18: 0f 76 00 00 00 00 00 00 r6 += r7 19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1 20: bf 96 00 00 00 00 00 00 r6 = r9 // r6 is used here The verifier will reject such code with the error: "math between map_value pointer and register with unbounded min value is not allowed" At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and the insn 20 undoes map_value addition. It is currently impossible for the verifier to understand such speculative pointer arithmetic. Hence llvm D85570 addresses it on the compiler side. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201009011240.48506-4-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/README.rst | 38 + .../selftests/bpf/prog_tests/test_profiler.c | 72 ++ tools/testing/selftests/bpf/progs/profiler.h | 177 ++++ tools/testing/selftests/bpf/progs/profiler.inc.h | 969 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/profiler1.c | 6 + tools/testing/selftests/bpf/progs/profiler2.c | 6 + tools/testing/selftests/bpf/progs/profiler3.c | 6 + 7 files changed, 1274 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_profiler.c create mode 100644 tools/testing/selftests/bpf/progs/profiler.h create mode 100644 tools/testing/selftests/bpf/progs/profiler.inc.h create mode 100644 tools/testing/selftests/bpf/progs/profiler1.c create mode 100644 tools/testing/selftests/bpf/progs/profiler2.c create mode 100644 tools/testing/selftests/bpf/progs/profiler3.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 66acfcf15ff2..ac9eda830187 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -7,6 +7,44 @@ General instructions on running selftests can be found in Additional information about selftest failures are documented here. +profiler[23] test failures with clang/llvm <12.0.0 +================================================== + +With clang/llvm <12.0.0, the profiler[23] test may fail. +The symptom looks like + +.. code-block:: c + + // r9 is a pointer to map_value + // r7 is a scalar + 17: bf 96 00 00 00 00 00 00 r6 = r9 + 18: 0f 76 00 00 00 00 00 00 r6 += r7 + math between map_value pointer and register with unbounded min value is not allowed + + // the instructions below will not be seen in the verifier log + 19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1 + 20: bf 96 00 00 00 00 00 00 r6 = r9 + // r6 is used here + +The verifier will reject such code with above error. +At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and +the insn 20 undoes map_value addition. It is currently impossible for the +verifier to understand such speculative pointer arithmetic. +Hence + https://reviews.llvm.org/D85570 +addresses it on the compiler side. It was committed on llvm 12. + +The corresponding C code +.. code-block:: c + + for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { + filepart_length = bpf_probe_read_str(payload, ...); + if (filepart_length <= MAX_PATH) { + barrier_var(filepart_length); // workaround + payload += filepart_length; + } + } + bpf_iter test failures with clang/llvm 10.0.0 ============================================= diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c new file mode 100644 index 000000000000..4ca275101ee0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include "progs/profiler.h" +#include "profiler1.skel.h" +#include "profiler2.skel.h" +#include "profiler3.skel.h" + +static int sanity_run(struct bpf_program *prog) +{ + struct bpf_prog_test_run_attr test_attr = {}; + __u64 args[] = {1, 2, 3}; + __u32 duration = 0; + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + test_attr.prog_fd = prog_fd; + test_attr.ctx_in = args; + test_attr.ctx_size_in = sizeof(args); + err = bpf_prog_test_run_xattr(&test_attr); + if (CHECK(err || test_attr.retval, "test_run", + "err %d errno %d retval %d duration %d\n", + err, errno, test_attr.retval, duration)) + return -1; + return 0; +} + +void test_test_profiler(void) +{ + struct profiler1 *profiler1_skel = NULL; + struct profiler2 *profiler2_skel = NULL; + struct profiler3 *profiler3_skel = NULL; + __u32 duration = 0; + int err; + + profiler1_skel = profiler1__open_and_load(); + if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n")) + goto cleanup; + + err = profiler1__attach(profiler1_skel); + if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; + + profiler2_skel = profiler2__open_and_load(); + if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n")) + goto cleanup; + + err = profiler2__attach(profiler2_skel); + if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; + + profiler3_skel = profiler3__open_and_load(); + if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n")) + goto cleanup; + + err = profiler3__attach(profiler3_skel); + if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; +cleanup: + profiler1__destroy(profiler1_skel); + profiler2__destroy(profiler2_skel); + profiler3__destroy(profiler3_skel); +} diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h new file mode 100644 index 000000000000..3bac4fdd4bdf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler.h @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#pragma once + +#define TASK_COMM_LEN 16 +#define MAX_ANCESTORS 4 +#define MAX_PATH 256 +#define KILL_TARGET_LEN 64 +#define CTL_MAXNAME 10 +#define MAX_ARGS_LEN 4096 +#define MAX_FILENAME_LEN 512 +#define MAX_ENVIRON_LEN 8192 +#define MAX_PATH_DEPTH 32 +#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH) +#define MAX_CGROUPS_PATH_DEPTH 8 + +#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN + +#define MAX_CGROUP_PAYLOAD_LEN \ + (MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH)) + +#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) + +#define MAX_SYSCTL_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH) + +#define MAX_KILL_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \ + KILL_TARGET_LEN) + +#define MAX_EXEC_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \ + MAX_ARGS_LEN + MAX_ENVIRON_LEN) + +#define MAX_FILEMOD_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \ + MAX_FILEPATH_LENGTH) + +enum data_type { + INVALID_EVENT, + EXEC_EVENT, + FORK_EVENT, + KILL_EVENT, + SYSCTL_EVENT, + FILEMOD_EVENT, + MAX_DATA_TYPE_EVENT +}; + +enum filemod_type { + FMOD_OPEN, + FMOD_LINK, + FMOD_SYMLINK, +}; + +struct ancestors_data_t { + pid_t ancestor_pids[MAX_ANCESTORS]; + uint32_t ancestor_exec_ids[MAX_ANCESTORS]; + uint64_t ancestor_start_times[MAX_ANCESTORS]; + uint32_t num_ancestors; +}; + +struct var_metadata_t { + enum data_type type; + pid_t pid; + uint32_t exec_id; + uid_t uid; + gid_t gid; + uint64_t start_time; + uint32_t cpu_id; + uint64_t bpf_stats_num_perf_events; + uint64_t bpf_stats_start_ktime_ns; + uint8_t comm_length; +}; + +struct cgroup_data_t { + ino_t cgroup_root_inode; + ino_t cgroup_proc_inode; + uint64_t cgroup_root_mtime; + uint64_t cgroup_proc_mtime; + uint16_t cgroup_root_length; + uint16_t cgroup_proc_length; + uint16_t cgroup_full_length; + int cgroup_full_path_root_pos; +}; + +struct var_sysctl_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + struct ancestors_data_t ancestors_info; + uint8_t sysctl_val_length; + uint16_t sysctl_path_length; + char payload[MAX_SYSCTL_PAYLOAD_LEN]; +}; + +struct var_kill_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + struct ancestors_data_t ancestors_info; + pid_t kill_target_pid; + int kill_sig; + uint32_t kill_count; + uint64_t last_kill_time; + uint8_t kill_target_name_length; + uint8_t kill_target_cgroup_proc_length; + char payload[MAX_KILL_PAYLOAD_LEN]; + size_t payload_length; +}; + +struct var_exec_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + pid_t parent_pid; + uint32_t parent_exec_id; + uid_t parent_uid; + uint64_t parent_start_time; + uint16_t bin_path_length; + uint16_t cmdline_length; + uint16_t environment_length; + char payload[MAX_EXEC_PAYLOAD_LEN]; +}; + +struct var_fork_data_t { + struct var_metadata_t meta; + pid_t parent_pid; + uint32_t parent_exec_id; + uint64_t parent_start_time; + char payload[MAX_METADATA_PAYLOAD_LEN]; +}; + +struct var_filemod_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + enum filemod_type fmod_type; + unsigned int dst_flags; + uint32_t src_device_id; + uint32_t dst_device_id; + ino_t src_inode; + ino_t dst_inode; + uint16_t src_filepath_length; + uint16_t dst_filepath_length; + char payload[MAX_FILEMOD_PAYLOAD_LEN]; +}; + +struct profiler_config_struct { + bool fetch_cgroups_from_bpf; + ino_t cgroup_fs_inode; + ino_t cgroup_login_session_inode; + uint64_t kill_signals_mask; + ino_t inode_filter; + uint32_t stale_info_secs; + bool use_variable_buffers; + bool read_environ_from_exec; + bool enable_cgroup_v1_resolver; +}; + +struct bpf_func_stats_data { + uint64_t time_elapsed_ns; + uint64_t num_executions; + uint64_t num_perf_events; +}; + +struct bpf_func_stats_ctx { + uint64_t start_time_ns; + struct bpf_func_stats_data* bpf_func_stats_data_val; +}; + +enum bpf_function_id { + profiler_bpf_proc_sys_write, + profiler_bpf_sched_process_exec, + profiler_bpf_sched_process_exit, + profiler_bpf_sys_enter_kill, + profiler_bpf_do_filp_open_ret, + profiler_bpf_sched_process_fork, + profiler_bpf_vfs_link, + profiler_bpf_vfs_symlink, + profiler_bpf_max_function_id +}; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h new file mode 100644 index 000000000000..00578311a423 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -0,0 +1,969 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include +#include +#include + +#include "profiler.h" + +#ifndef NULL +#define NULL 0 +#endif + +#define O_WRONLY 00000001 +#define O_RDWR 00000002 +#define O_DIRECTORY 00200000 +#define __O_TMPFILE 020000000 +#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +#define MAX_ERRNO 4095 +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 +#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) +#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) +#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) +#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) +#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) +#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO + +#define KILL_DATA_ARRAY_SIZE 8 + +struct var_kill_data_arr_t { + struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; +}; + +union any_profiler_data_t { + struct var_exec_data_t var_exec; + struct var_kill_data_t var_kill; + struct var_sysctl_data_t var_sysctl; + struct var_filemod_data_t var_filemod; + struct var_fork_data_t var_fork; + struct var_kill_data_arr_t var_kill_data_arr; +}; + +volatile struct profiler_config_struct bpf_config = {}; + +#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) +#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) +#define CGROUP_LOGIN_SESSION_INODE \ + (bpf_config.cgroup_login_session_inode) +#define KILL_SIGNALS (bpf_config.kill_signals_mask) +#define STALE_INFO (bpf_config.stale_info_secs) +#define INODE_FILTER (bpf_config.inode_filter) +#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) +#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) + +struct kernfs_iattrs___52 { + struct iattr ia_iattr; +}; + +struct kernfs_node___52 { + union /* kernfs_node_id */ { + struct { + u32 ino; + u32 generation; + }; + u64 id; + } id; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, union any_profiler_data_t); +} data_heap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, KILL_DATA_ARRAY_SIZE); + __type(key, u32); + __type(value, struct var_kill_data_arr_t); +} var_tpid_to_data SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, profiler_bpf_max_function_id); + __type(key, u32); + __type(value, struct bpf_func_stats_data); +} bpf_func_stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, bool); + __uint(max_entries, 16); +} allowed_devices SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, bool); + __uint(max_entries, 1024); +} allowed_file_inodes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, bool); + __uint(max_entries, 1024); +} allowed_directory_inodes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, bool); + __uint(max_entries, 16); +} disallowed_exec_inodes SEC(".maps"); + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#endif + +static INLINE bool IS_ERR(const void* ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static INLINE u32 get_userspace_pid() +{ + return bpf_get_current_pid_tgid() >> 32; +} + +static INLINE bool is_init_process(u32 tgid) +{ + return tgid == 1 || tgid == 0; +} + +static INLINE unsigned long +probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) +{ + len = len < max ? len : max; + if (len > 1) { + if (bpf_probe_read(dst, len, src)) + return 0; + } else if (len == 1) { + if (bpf_probe_read(dst, 1, src)) + return 0; + } + return len; +} + +static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, + int spid) +{ +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) + if (arr_struct->array[i].meta.pid == spid) + return i; + return -1; +} + +static INLINE void populate_ancestors(struct task_struct* task, + struct ancestors_data_t* ancestors_data) +{ + struct task_struct* parent = task; + u32 num_ancestors, ppid; + + ancestors_data->num_ancestors = 0; +#ifdef UNROLL +#pragma unroll +#endif + for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { + parent = BPF_CORE_READ(parent, real_parent); + if (parent == NULL) + break; + ppid = BPF_CORE_READ(parent, tgid); + if (is_init_process(ppid)) + break; + ancestors_data->ancestor_pids[num_ancestors] = ppid; + ancestors_data->ancestor_exec_ids[num_ancestors] = + BPF_CORE_READ(parent, self_exec_id); + ancestors_data->ancestor_start_times[num_ancestors] = + BPF_CORE_READ(parent, start_time); + ancestors_data->num_ancestors = num_ancestors; + } +} + +static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, + struct kernfs_node* cgroup_root_node, + void* payload, + int* root_pos) +{ + void* payload_start = payload; + size_t filepart_length; + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { + filepart_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name)); + if (!cgroup_node) + return payload; + if (cgroup_node == cgroup_root_node) + *root_pos = payload - payload_start; + if (filepart_length <= MAX_PATH) { + barrier_var(filepart_length); + payload += filepart_length; + } + cgroup_node = BPF_CORE_READ(cgroup_node, parent); + } + return payload; +} + +static ino_t get_inode_from_kernfs(struct kernfs_node* node) +{ + struct kernfs_node___52* node52 = (void*)node; + + if (bpf_core_field_exists(node52->id.ino)) { + barrier_var(node52); + return BPF_CORE_READ(node52, id.ino); + } else { + barrier_var(node); + return (u64)BPF_CORE_READ(node, id); + } +} + +int pids_cgrp_id = 1; + +static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, + struct task_struct* task, + void* payload) +{ + struct kernfs_node* root_kernfs = + BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); + struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); + + if (ENABLE_CGROUP_V1_RESOLVER) { +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys_state* subsys = + BPF_CORE_READ(task, cgroups, subsys[i]); + if (subsys != NULL) { + int subsys_id = BPF_CORE_READ(subsys, ss, id); + if (subsys_id == pids_cgrp_id) { + proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); + root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); + break; + } + } + } + } + + cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); + cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); + + if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { + cgroup_data->cgroup_root_mtime = + BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); + cgroup_data->cgroup_proc_mtime = + BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); + } else { + struct kernfs_iattrs___52* root_iattr = + (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); + cgroup_data->cgroup_root_mtime = + BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); + + struct kernfs_iattrs___52* proc_iattr = + (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); + cgroup_data->cgroup_proc_mtime = + BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); + } + + cgroup_data->cgroup_root_length = 0; + cgroup_data->cgroup_proc_length = 0; + cgroup_data->cgroup_full_length = 0; + + size_t cgroup_root_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name)); + barrier_var(cgroup_root_length); + if (cgroup_root_length <= MAX_PATH) { + barrier_var(cgroup_root_length); + cgroup_data->cgroup_root_length = cgroup_root_length; + payload += cgroup_root_length; + } + + size_t cgroup_proc_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name)); + barrier_var(cgroup_proc_length); + if (cgroup_proc_length <= MAX_PATH) { + barrier_var(cgroup_proc_length); + cgroup_data->cgroup_proc_length = cgroup_proc_length; + payload += cgroup_proc_length; + } + + if (FETCH_CGROUPS_FROM_BPF) { + cgroup_data->cgroup_full_path_root_pos = -1; + void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload, + &cgroup_data->cgroup_full_path_root_pos); + cgroup_data->cgroup_full_length = payload_end_pos - payload; + payload = payload_end_pos; + } + + return (void*)payload; +} + +static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, + struct task_struct* task, + u32 pid, void* payload) +{ + u64 uid_gid = bpf_get_current_uid_gid(); + + metadata->uid = (u32)uid_gid; + metadata->gid = uid_gid >> 32; + metadata->pid = pid; + metadata->exec_id = BPF_CORE_READ(task, self_exec_id); + metadata->start_time = BPF_CORE_READ(task, start_time); + metadata->comm_length = 0; + + size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); + barrier_var(comm_length); + if (comm_length <= TASK_COMM_LEN) { + barrier_var(comm_length); + metadata->comm_length = comm_length; + payload += comm_length; + } + + return (void*)payload; +} + +static INLINE struct var_kill_data_t* +get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) +{ + int zero = 0; + struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); + + if (kill_data == NULL) + return NULL; + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); + payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload); + size_t payload_length = payload - (void*)kill_data->payload; + kill_data->payload_length = payload_length; + populate_ancestors(task, &kill_data->ancestors_info); + kill_data->meta.type = KILL_EVENT; + kill_data->kill_target_pid = tpid; + kill_data->kill_sig = sig; + kill_data->kill_count = 1; + kill_data->last_kill_time = bpf_ktime_get_ns(); + return kill_data; +} + +static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) +{ + if ((KILL_SIGNALS & (1ULL << sig)) == 0) + return 0; + + u32 spid = get_userspace_pid(); + struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); + + if (arr_struct == NULL) { + struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); + int zero = 0; + + if (kill_data == NULL) + return 0; + arr_struct = bpf_map_lookup_elem(&data_heap, &zero); + if (arr_struct == NULL) + return 0; + bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data); + } else { + int index = get_var_spid_index(arr_struct, spid); + + if (index == -1) { + struct var_kill_data_t* kill_data = + get_var_kill_data(ctx, spid, tpid, sig); + if (kill_data == NULL) + return 0; +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) + if (arr_struct->array[i].meta.pid == 0) { + bpf_probe_read(&arr_struct->array[i], + sizeof(arr_struct->array[i]), kill_data); + bpf_map_update_elem(&var_tpid_to_data, &tpid, + arr_struct, 0); + + return 0; + } + return 0; + } + + struct var_kill_data_t* kill_data = &arr_struct->array[index]; + + u64 delta_sec = + (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; + + if (delta_sec < STALE_INFO) { + kill_data->kill_count++; + kill_data->last_kill_time = bpf_ktime_get_ns(); + bpf_probe_read(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); + } else { + struct var_kill_data_t* kill_data = + get_var_kill_data(ctx, spid, tpid, sig); + if (kill_data == NULL) + return 0; + bpf_probe_read(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); + } + } + bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); + return 0; +} + +static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, + enum bpf_function_id func_id) +{ + int func_id_key = func_id; + + bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); + bpf_stat_ctx->bpf_func_stats_data_val = + bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); + if (bpf_stat_ctx->bpf_func_stats_data_val) + bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; +} + +static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) +{ + if (bpf_stat_ctx->bpf_func_stats_data_val) + bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += + bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; +} + +static INLINE void +bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, + struct var_metadata_t* meta) +{ + if (bpf_stat_ctx->bpf_func_stats_data_val) { + bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; + meta->bpf_stats_num_perf_events = + bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; + } + meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; + meta->cpu_id = bpf_get_smp_processor_id(); +} + +static INLINE size_t +read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) +{ + size_t length = 0; + size_t filepart_length; + struct dentry* parent_dentry; + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_PATH_DEPTH; i++) { + filepart_length = bpf_probe_read_str(payload, MAX_PATH, + BPF_CORE_READ(filp_dentry, d_name.name)); + barrier_var(filepart_length); + if (filepart_length > MAX_PATH) + break; + barrier_var(filepart_length); + payload += filepart_length; + length += filepart_length; + + parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); + if (filp_dentry == parent_dentry) + break; + filp_dentry = parent_dentry; + } + + return length; +} + +static INLINE bool +is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) +{ + struct dentry* parent_dentry; +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_PATH_DEPTH; i++) { + u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); + bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); + + if (allowed_dir != NULL) + return true; + parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); + if (filp_dentry == parent_dentry) + break; + filp_dentry = parent_dentry; + } + return false; +} + +static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, + u32* device_id, + u64* file_ino) +{ + u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); + *device_id = dev_id; + bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); + + if (allowed_device == NULL) + return false; + + u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); + *file_ino = ino; + bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); + + if (allowed_file == NULL) + if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) + return false; + return true; +} + +SEC("kprobe/proc_sys_write") +ssize_t BPF_KPROBE(kprobe__proc_sys_write, + struct file* filp, const char* buf, + size_t count, loff_t* ppos) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); + + u32 pid = get_userspace_pid(); + int zero = 0; + struct var_sysctl_data_t* sysctl_data = + bpf_map_lookup_elem(&data_heap, &zero); + if (!sysctl_data) + goto out; + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + sysctl_data->meta.type = SYSCTL_EVENT; + void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); + payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); + + populate_ancestors(task, &sysctl_data->ancestors_info); + + sysctl_data->sysctl_val_length = 0; + sysctl_data->sysctl_path_length = 0; + + size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf); + barrier_var(sysctl_val_length); + if (sysctl_val_length <= CTL_MAXNAME) { + barrier_var(sysctl_val_length); + sysctl_data->sysctl_val_length = sysctl_val_length; + payload += sysctl_val_length; + } + + size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH, + BPF_CORE_READ(filp, f_path.dentry, d_name.name)); + barrier_var(sysctl_path_length); + if (sysctl_path_length <= MAX_PATH) { + barrier_var(sysctl_path_length); + sysctl_data->sysctl_path_length = sysctl_path_length; + payload += sysctl_path_length; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); + unsigned long data_len = payload - (void*)sysctl_data; + data_len = data_len > sizeof(struct var_sysctl_data_t) + ? sizeof(struct var_sysctl_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_kill") +int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + + bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); + int pid = ctx->args[0]; + int sig = ctx->args[1]; + int ret = trace_var_sys_kill(ctx, pid, sig); + bpf_stats_exit(&stats_ctx); + return ret; +}; + +SEC("raw_tracepoint/sched_process_exit") +int raw_tracepoint__sched_process_exit(void* ctx) +{ + int zero = 0; + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit); + + u32 tpid = get_userspace_pid(); + + struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); + struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); + + if (arr_struct == NULL || kill_data == NULL) + goto out; + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { + struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; + + if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { + bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data); + void* payload = kill_data->payload; + size_t offset = kill_data->payload_length; + if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) + return 0; + payload += offset; + + kill_data->kill_target_name_length = 0; + kill_data->kill_target_cgroup_proc_length = 0; + + size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); + barrier_var(comm_length); + if (comm_length <= TASK_COMM_LEN) { + barrier_var(comm_length); + kill_data->kill_target_name_length = comm_length; + payload += comm_length; + } + + size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN, + BPF_CORE_READ(proc_kernfs, name)); + barrier_var(cgroup_proc_length); + if (cgroup_proc_length <= KILL_TARGET_LEN) { + barrier_var(cgroup_proc_length); + kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; + payload += cgroup_proc_length; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta); + unsigned long data_len = (void*)payload - (void*)kill_data; + data_len = data_len > sizeof(struct var_kill_data_t) + ? sizeof(struct var_kill_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); + } + } + bpf_map_delete_elem(&var_tpid_to_data, &tpid); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("raw_tracepoint/sched_process_exec") +int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec); + + struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; + u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); + + bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); + if (should_filter_binprm != NULL) + goto out; + + int zero = 0; + struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!proc_exec_data) + goto out; + + if (INODE_FILTER && inode != INODE_FILTER) + return 0; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + proc_exec_data->meta.type = EXEC_EVENT; + proc_exec_data->bin_path_length = 0; + proc_exec_data->cmdline_length = 0; + proc_exec_data->environment_length = 0; + void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, + proc_exec_data->payload); + payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload); + + struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); + proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); + proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); + proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); + proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); + + const char* filename = BPF_CORE_READ(bprm, filename); + size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename); + barrier_var(bin_path_length); + if (bin_path_length <= MAX_FILENAME_LEN) { + barrier_var(bin_path_length); + proc_exec_data->bin_path_length = bin_path_length; + payload += bin_path_length; + } + + void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); + void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); + unsigned int cmdline_length = probe_read_lim(payload, arg_start, + arg_end - arg_start, MAX_ARGS_LEN); + + if (cmdline_length <= MAX_ARGS_LEN) { + barrier_var(cmdline_length); + proc_exec_data->cmdline_length = cmdline_length; + payload += cmdline_length; + } + + if (READ_ENVIRON_FROM_EXEC) { + void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); + void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); + unsigned long env_len = probe_read_lim(payload, env_start, + env_end - env_start, MAX_ENVIRON_LEN); + if (cmdline_length <= MAX_ENVIRON_LEN) { + proc_exec_data->environment_length = env_len; + payload += env_len; + } + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta); + unsigned long data_len = payload - (void*)proc_exec_data; + data_len = data_len > sizeof(struct var_exec_data_t) + ? sizeof(struct var_exec_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kretprobe/do_filp_open") +int kprobe_ret__do_filp_open(struct pt_regs* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret); + + struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); + + if (filp == NULL || IS_ERR(filp)) + goto out; + unsigned int flags = BPF_CORE_READ(filp, f_flags); + if ((flags & (O_RDWR | O_WRONLY)) == 0) + goto out; + if ((flags & O_TMPFILE) > 0) + goto out; + struct inode* file_inode = BPF_CORE_READ(filp, f_inode); + umode_t mode = BPF_CORE_READ(file_inode, i_mode); + if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || + S_ISSOCK(mode)) + goto out; + + struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); + u32 device_id = 0; + u64 file_ino = 0; + if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_OPEN; + filemod_data->dst_flags = flags; + filemod_data->src_inode = 0; + filemod_data->dst_inode = file_ino; + filemod_data->src_device_id = 0; + filemod_data->dst_device_id = device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kprobe/vfs_link") +int BPF_KPROBE(kprobe__vfs_link, + struct dentry* old_dentry, struct inode* dir, + struct dentry* new_dentry, struct inode** delegated_inode) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); + + u32 src_device_id = 0; + u64 src_file_ino = 0; + u32 dst_device_id = 0; + u64 dst_file_ino = 0; + if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && + !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_LINK; + filemod_data->dst_flags = 0; + filemod_data->src_inode = src_file_ino; + filemod_data->dst_inode = dst_file_ino; + filemod_data->src_device_id = src_device_id; + filemod_data->dst_device_id = dst_device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->src_filepath_length = len; + } + + len = read_absolute_file_path_from_dentry(new_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kprobe/vfs_symlink") +int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, + const char* oldname) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink); + + u32 dst_device_id = 0; + u64 dst_file_ino = 0; + if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_SYMLINK; + filemod_data->dst_flags = 0; + filemod_data->src_inode = 0; + filemod_data->dst_inode = dst_file_ino; + filemod_data->src_device_id = 0; + filemod_data->dst_device_id = dst_device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->src_filepath_length = len; + } + len = read_absolute_file_path_from_dentry(dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("raw_tracepoint/sched_process_fork") +int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork); + + int zero = 0; + struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!fork_data) + goto out; + + struct task_struct* parent = (struct task_struct*)ctx->args[0]; + struct task_struct* child = (struct task_struct*)ctx->args[1]; + fork_data->meta.type = FORK_EVENT; + + void* payload = populate_var_metadata(&fork_data->meta, child, + BPF_CORE_READ(child, pid), fork_data->payload); + fork_data->parent_pid = BPF_CORE_READ(parent, pid); + fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); + fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta); + + unsigned long data_len = payload - (void*)fork_data; + data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c new file mode 100644 index 000000000000..4df9088bfc00 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler1.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) +#define UNROLL +#define INLINE __always_inline +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c new file mode 100644 index 000000000000..0f32a3cbf556 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler2.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) /**/ +/* undef #define UNROLL */ +#define INLINE /**/ +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c new file mode 100644 index 000000000000..6249fc31ccb0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler3.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) /**/ +#define UNROLL +#define INLINE __noinline +#include "profiler.inc.h" -- cgit v1.3.1 From 54fada41e8a162a45ab13ac4f76af609733d4679 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 8 Oct 2020 18:12:40 -0700 Subject: selftests/bpf: Asm tests for the verifier regalloc tracking. Add asm tests for register allocator tracking logic. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201009011240.48506-5-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/verifier/regalloc.c | 243 ++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 tools/testing/selftests/bpf/verifier/regalloc.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c new file mode 100644 index 000000000000..ac71b824f97a --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/regalloc.c @@ -0,0 +1,243 @@ +{ + "regalloc basic", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=48 size=1", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc src_reg mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc src_reg negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc and spill", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7), + /* r0 has upper bound that should propagate into r2 */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */ + BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2), + /* r3 has lower and upper bounds */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc and spill negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7), + /* r0 has upper bound that should propagate into r2 */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */ + BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2), + /* r3 has lower and upper bounds */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=48 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc three regs", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc after call", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, -- cgit v1.3.1 From 1abdd39f14b25dd2d69096b624a4f86f158a9feb Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 30 Sep 2020 11:31:51 -0700 Subject: kunit: tool: fix display of make errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CalledProcessError stores the output of the failed process as `bytes`, not a `str`. So when we log it on build error, the make output is all crammed into one line with "\n" instead of actually printing new lines. After this change, we get readable output with new lines, e.g. > CC lib/kunit/kunit-example-test.o > In file included from ../lib/kunit/test.c:9: > ../include/kunit/test.h:22:1: error: unknown type name ‘invalid_type_that_causes_compile’ > 22 | invalid_type_that_causes_compile errors; > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > make[3]: *** [../scripts/Makefile.build:283: lib/kunit/test.o] Error 1 Secondly, trying to concat exceptions to strings will fail with > TypeError: can only concatenate str (not "OSError") to str so fix this with an explicit cast to str. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 1b1826500f61..b557b1e93f98 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -36,9 +36,9 @@ class LinuxSourceTreeOperations(object): try: subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT) except OSError as e: - raise ConfigError('Could not call make command: ' + e) + raise ConfigError('Could not call make command: ' + str(e)) except subprocess.CalledProcessError as e: - raise ConfigError(e.output) + raise ConfigError(e.output.decode()) def make_olddefconfig(self, build_dir, make_options): command = ['make', 'ARCH=um', 'olddefconfig'] @@ -49,9 +49,9 @@ class LinuxSourceTreeOperations(object): try: subprocess.check_output(command, stderr=subprocess.STDOUT) except OSError as e: - raise ConfigError('Could not call make command: ' + e) + raise ConfigError('Could not call make command: ' + str(e)) except subprocess.CalledProcessError as e: - raise ConfigError(e.output) + raise ConfigError(e.output.decode()) def make_allyesconfig(self, build_dir, make_options): kunit_parser.print_with_timestamp( @@ -84,9 +84,9 @@ class LinuxSourceTreeOperations(object): try: subprocess.check_output(command, stderr=subprocess.STDOUT) except OSError as e: - raise BuildError('Could not call execute make: ' + e) + raise BuildError('Could not call execute make: ' + str(e)) except subprocess.CalledProcessError as e: - raise BuildError(e.output) + raise BuildError(e.output.decode()) def linux_bin(self, params, timeout, build_dir, outfile): """Runs the Linux UML binary. Must be named 'linux'.""" -- cgit v1.3.1 From 45dcbb6f5ef78b0a9c1b91bea2f6f227642a65aa Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Tue, 4 Aug 2020 13:47:44 -0700 Subject: kunit: test: add test plan to KUnit TAP format TAP 14 allows an optional test plan to be emitted before the start of the start of testing[1]; this is valuable because it makes it possible for a test harness to detect whether the number of tests run matches the number of tests expected to be run, ensuring that no tests silently failed. Link[1]: https://github.com/isaacs/testanything.github.io/blob/tap14/tap-version-14-specification.md#the-plan Signed-off-by: Brendan Higgins Reviewed-by: Stephen Boyd Signed-off-by: Shuah Khan --- lib/kunit/executor.c | 17 +++++ lib/kunit/test.c | 11 ---- tools/testing/kunit/kunit_parser.py | 76 ++++++++++++++++++---- .../test_data/test_is_test_passed-all_passed.log | 1 + .../kunit/test_data/test_is_test_passed-crash.log | 1 + .../test_data/test_is_test_passed-failure.log | 1 + 6 files changed, 82 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 4aab7f70a88c..a95742a4ece7 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -11,10 +11,27 @@ extern struct kunit_suite * const * const __kunit_suites_end[]; #if IS_BUILTIN(CONFIG_KUNIT) +static void kunit_print_tap_header(void) +{ + struct kunit_suite * const * const *suites, * const *subsuite; + int num_of_suites = 0; + + for (suites = __kunit_suites_start; + suites < __kunit_suites_end; + suites++) + for (subsuite = *suites; *subsuite != NULL; subsuite++) + num_of_suites++; + + pr_info("TAP version 14\n"); + pr_info("1..%d\n", num_of_suites); +} + int kunit_run_all_tests(void) { struct kunit_suite * const * const *suites; + kunit_print_tap_header(); + for (suites = __kunit_suites_start; suites < __kunit_suites_end; suites++) diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 3fd89f91937f..de07876b6601 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -20,16 +20,6 @@ static void kunit_set_failure(struct kunit *test) WRITE_ONCE(test->success, false); } -static void kunit_print_tap_version(void) -{ - static bool kunit_has_printed_tap_version; - - if (!kunit_has_printed_tap_version) { - pr_info("TAP version 14\n"); - kunit_has_printed_tap_version = true; - } -} - /* * Append formatted message to log, size of which is limited to * KUNIT_LOG_SIZE bytes (including null terminating byte). @@ -69,7 +59,6 @@ EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases); static void kunit_print_subtest_start(struct kunit_suite *suite) { - kunit_print_tap_version(); kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "# Subtest: %s", suite->name); kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "1..%zd", diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index f13e0c0d6663..8019e3dd4c32 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -45,10 +45,11 @@ class TestStatus(Enum): FAILURE = auto() TEST_CRASHED = auto() NO_TESTS = auto() + FAILURE_TO_PARSE_TESTS = auto() kunit_start_re = re.compile(r'TAP version [0-9]+$') kunit_end_re = re.compile('(List of all partitions:|' - 'Kernel panic - not syncing: VFS:|reboot: System halted)') + 'Kernel panic - not syncing: VFS:)') def isolate_kunit_output(kernel_output): started = False @@ -109,7 +110,7 @@ OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text']) OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') -OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$') +OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$') def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: save_non_diagnositic(lines, test_case) @@ -197,7 +198,9 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus: else: return TestStatus.SUCCESS -def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool: +def parse_ok_not_ok_test_suite(lines: List[str], + test_suite: TestSuite, + expected_suite_index: int) -> bool: consume_non_diagnositic(lines) if not lines: test_suite.status = TestStatus.TEST_CRASHED @@ -210,6 +213,12 @@ def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool: test_suite.status = TestStatus.SUCCESS else: test_suite.status = TestStatus.FAILURE + suite_index = int(match.group(2)) + if suite_index != expected_suite_index: + print_with_timestamp( + red('[ERROR] ') + 'expected_suite_index ' + + str(expected_suite_index) + ', but got ' + + str(suite_index)) return True else: return False @@ -222,7 +231,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) return max_status(max_test_case_status, test_suite.status) -def parse_test_suite(lines: List[str]) -> TestSuite: +def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: if not lines: return None consume_non_diagnositic(lines) @@ -241,7 +250,7 @@ def parse_test_suite(lines: List[str]) -> TestSuite: break test_suite.cases.append(test_case) expected_test_case_num -= 1 - if parse_ok_not_ok_test_suite(lines, test_suite): + if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index): test_suite.status = bubble_up_test_case_errors(test_suite) return test_suite elif not lines: @@ -261,6 +270,17 @@ def parse_tap_header(lines: List[str]) -> bool: else: return False +TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') + +def parse_test_plan(lines: List[str]) -> int: + consume_non_diagnositic(lines) + match = TEST_PLAN.match(lines[0]) + if match: + lines.pop(0) + return int(match.group(1)) + else: + return None + def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus: return bubble_up_errors(lambda x: x.status, test_suite_list) @@ -268,20 +288,33 @@ def parse_test_result(lines: List[str]) -> TestResult: consume_non_diagnositic(lines) if not lines or not parse_tap_header(lines): return TestResult(TestStatus.NO_TESTS, [], lines) + expected_test_suite_num = parse_test_plan(lines) + if not expected_test_suite_num: + return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) test_suites = [] - test_suite = parse_test_suite(lines) - while test_suite: - test_suites.append(test_suite) - test_suite = parse_test_suite(lines) - return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines) + for i in range(1, expected_test_suite_num + 1): + test_suite = parse_test_suite(lines, i) + if test_suite: + test_suites.append(test_suite) + else: + print_with_timestamp( + red('[ERROR] ') + ' expected ' + + str(expected_test_suite_num) + + ' test suites, but got ' + str(i - 2)) + break + test_suite = parse_test_suite(lines, -1) + if test_suite: + print_with_timestamp(red('[ERROR] ') + + 'got unexpected test suite: ' + test_suite.name) + if test_suites: + return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines) + else: + return TestResult(TestStatus.NO_TESTS, [], lines) -def parse_run_tests(kernel_output) -> TestResult: +def print_and_count_results(test_result: TestResult) -> None: total_tests = 0 failed_tests = 0 crashed_tests = 0 - test_result = parse_test_result(list(isolate_kunit_output(kernel_output))) - if test_result.status == TestStatus.NO_TESTS: - print_with_timestamp(red('[ERROR] ') + 'no kunit output detected') for test_suite in test_result.suites: if test_suite.status == TestStatus.SUCCESS: print_suite_divider(green('[PASSED] ') + test_suite.name) @@ -303,6 +336,21 @@ def parse_run_tests(kernel_output) -> TestResult: print_with_timestamp(red('[FAILED] ') + test_case.name) print_log(map(yellow, test_case.log)) print_with_timestamp('') + return total_tests, failed_tests, crashed_tests + +def parse_run_tests(kernel_output) -> TestResult: + total_tests = 0 + failed_tests = 0 + crashed_tests = 0 + test_result = parse_test_result(list(isolate_kunit_output(kernel_output))) + if test_result.status == TestStatus.NO_TESTS: + print(red('[ERROR] ') + yellow('no tests run!')) + elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS: + print(red('[ERROR] ') + yellow('could not parse test results!')) + else: + (total_tests, + failed_tests, + crashed_tests) = print_and_count_results(test_result) print_with_timestamp(DIVIDER) fmt = green if test_result.status == TestStatus.SUCCESS else red print_with_timestamp( diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log index 62ebc0288355..bc0dc8fe35b7 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log @@ -1,4 +1,5 @@ TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log index 0b249870c8be..4d97f6708c4a 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-crash.log @@ -1,6 +1,7 @@ printk: console [tty0] enabled printk: console [mc-1] enabled TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure.log b/tools/testing/kunit/test_data/test_is_test_passed-failure.log index 9e89d32d5667..7a416497e3be 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-failure.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-failure.log @@ -1,4 +1,5 @@ TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed -- cgit v1.3.1 From dd2ce6a5373c6f5c830be54be10775458a8bd312 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:01 +0200 Subject: bpf: Improve bpf_redirect_neigh helper description Follow-up to address David's feedback that we should better describe internals of the bpf_redirect_neigh() helper. Suggested-by: David Ahern Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: David Ahern Link: https://lore.kernel.org/bpf/20201010234006.7075-2-daniel@iogearbox.net --- include/uapi/linux/bpf.h | 10 +++++++--- tools/include/uapi/linux/bpf.h | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42d2df799397..4272cc53d478 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3679,10 +3679,14 @@ union bpf_attr { * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it - * fills in e.g. MAC addresses based on the L3 information from - * the packet. This helper is supported for IPv4 and IPv6 protocols. + * populates L2 addresses as well, meaning, internally, the helper + * performs a FIB lookup based on the skb's networking header to + * get the address of the next hop and then relies on the neighbor + * lookup for the L2 address of the nexthop. + * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types. + * currently only supported for tc BPF program types, and enabled + * for IPv4 and IPv6 protocols. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 42d2df799397..4272cc53d478 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3679,10 +3679,14 @@ union bpf_attr { * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it - * fills in e.g. MAC addresses based on the L3 information from - * the packet. This helper is supported for IPv4 and IPv6 protocols. + * populates L2 addresses as well, meaning, internally, the helper + * performs a FIB lookup based on the skb's networking header to + * get the address of the next hop and then relies on the neighbor + * lookup for the L2 address of the nexthop. + * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types. + * currently only supported for tc BPF program types, and enabled + * for IPv4 and IPv6 protocols. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. -- cgit v1.3.1 From 9aa1206e8f48222f35a0c809f33b2f4aaa1e2661 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:02 +0200 Subject: bpf: Add redirect_peer helper Add an efficient ingress to ingress netns switch that can be used out of tc BPF programs in order to redirect traffic from host ns ingress into a container veth device ingress without having to go via CPU backlog queue [0]. For local containers this can also be utilized and path via CPU backlog queue only needs to be taken once, not twice. On a high level this borrows from ipvlan which does similar switch in __netif_receive_skb_core() and then iterates via another_round. This helps to reduce latency for mentioned use cases. Pod to remote pod with redirect(), TCP_RR [1]: # percpu_netperf 10.217.1.33 RT_LATENCY: 122.450 (per CPU: 122.666 122.401 122.333 122.401 ) MEAN_LATENCY: 121.210 (per CPU: 121.100 121.260 121.320 121.160 ) STDDEV_LATENCY: 120.040 (per CPU: 119.420 119.910 125.460 115.370 ) MIN_LATENCY: 46.500 (per CPU: 47.000 47.000 47.000 45.000 ) P50_LATENCY: 118.500 (per CPU: 118.000 119.000 118.000 119.000 ) P90_LATENCY: 127.500 (per CPU: 127.000 128.000 127.000 128.000 ) P99_LATENCY: 130.750 (per CPU: 131.000 131.000 129.000 132.000 ) TRANSACTION_RATE: 32666.400 (per CPU: 8152.200 8169.842 8174.439 8169.897 ) Pod to remote pod with redirect_peer(), TCP_RR: # percpu_netperf 10.217.1.33 RT_LATENCY: 44.449 (per CPU: 43.767 43.127 45.279 45.622 ) MEAN_LATENCY: 45.065 (per CPU: 44.030 45.530 45.190 45.510 ) STDDEV_LATENCY: 84.823 (per CPU: 66.770 97.290 84.380 90.850 ) MIN_LATENCY: 33.500 (per CPU: 33.000 33.000 34.000 34.000 ) P50_LATENCY: 43.250 (per CPU: 43.000 43.000 43.000 44.000 ) P90_LATENCY: 46.750 (per CPU: 46.000 47.000 47.000 47.000 ) P99_LATENCY: 52.750 (per CPU: 51.000 54.000 53.000 53.000 ) TRANSACTION_RATE: 90039.500 (per CPU: 22848.186 23187.089 22085.077 21919.130 ) [0] https://linuxplumbersconf.org/event/7/contributions/674/attachments/568/1002/plumbers_2020_cilium_load_balancer.pdf [1] https://github.com/borkmann/netperf_scripts/blob/master/percpu_netperf Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201010234006.7075-3-daniel@iogearbox.net --- drivers/net/veth.c | 9 +++++++ include/linux/netdevice.h | 4 ++++ include/uapi/linux/bpf.h | 17 +++++++++++++ net/core/dev.c | 15 +++++++++--- net/core/filter.c | 54 ++++++++++++++++++++++++++++++++++++------ tools/include/uapi/linux/bpf.h | 17 +++++++++++++ 6 files changed, 106 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 091e5b4ba042..8c737668008a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -420,6 +420,14 @@ static int veth_select_rxq(struct net_device *dev) return smp_processor_id() % dev->real_num_rx_queues; } +static struct net_device *veth_peer_dev(struct net_device *dev) +{ + struct veth_priv *priv = netdev_priv(dev); + + /* Callers must be under RCU read side. */ + return rcu_dereference(priv->peer); +} + static int veth_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags, bool ndo_xmit) @@ -1224,6 +1232,7 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_set_rx_headroom = veth_set_rx_headroom, .ndo_bpf = veth_xdp, .ndo_xdp_xmit = veth_ndo_xdp_xmit, + .ndo_get_peer_dev = veth_peer_dev, }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 28cfa53daf72..0533f86018dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1277,6 +1277,9 @@ struct netdev_net_notifier { * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. + * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); + * If a device is paired with a peer device, return the peer instance. + * The caller must be under RCU read context. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1484,6 +1487,7 @@ struct net_device_ops { struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); + struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); }; /** diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4272cc53d478..b97bc5abb3b8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3719,6 +3719,22 @@ union bpf_attr { * never return NULL. * Return * A pointer pointing to the kernel percpu variable on this cpu. + * + * long bpf_redirect_peer(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_redirect**\ (), except + * that the redirection happens to the *ifindex*' peer device and + * the netns switch takes place from ingress to ingress without + * going through the CPU's backlog queue. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types at the ingress + * hook and for veth device types. The peer device must reside in a + * different network namespace. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3876,6 +3892,7 @@ union bpf_attr { FN(redirect_neigh), \ FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ + FN(redirect_peer), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/dev.c b/net/core/dev.c index 9d55bf5d1a65..7dd015823593 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4930,7 +4930,7 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); static inline struct sk_buff * sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev) + struct net_device *orig_dev, bool *another) { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); @@ -4974,7 +4974,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, * redirecting to another netdev */ __skb_push(skb, skb->mac_len); - skb_do_redirect(skb); + if (skb_do_redirect(skb) == -EAGAIN) { + __skb_pull(skb, skb->mac_len); + *another = true; + break; + } return NULL; case TC_ACT_CONSUMED: return NULL; @@ -5163,7 +5167,12 @@ another_round: skip_taps: #ifdef CONFIG_NET_INGRESS if (static_branch_unlikely(&ingress_needed_key)) { - skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); + bool another = false; + + skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, + &another); + if (another) + goto another_round; if (!skb) goto out; diff --git a/net/core/filter.c b/net/core/filter.c index 5da44b11e1ec..fab951c6be57 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2380,8 +2380,9 @@ out: /* Internal, non-exposed redirect flags. */ enum { - BPF_F_NEIGH = (1ULL << 1), -#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH) + BPF_F_NEIGH = (1ULL << 1), + BPF_F_PEER = (1ULL << 2), +#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER) }; BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) @@ -2430,19 +2431,35 @@ EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); int skb_do_redirect(struct sk_buff *skb) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct net *net = dev_net(skb->dev); struct net_device *dev; u32 flags = ri->flags; - dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index); + dev = dev_get_by_index_rcu(net, ri->tgt_index); ri->tgt_index = 0; - if (unlikely(!dev)) { - kfree_skb(skb); - return -EINVAL; + ri->flags = 0; + if (unlikely(!dev)) + goto out_drop; + if (flags & BPF_F_PEER) { + const struct net_device_ops *ops = dev->netdev_ops; + + if (unlikely(!ops->ndo_get_peer_dev || + !skb_at_tc_ingress(skb))) + goto out_drop; + dev = ops->ndo_get_peer_dev(dev); + if (unlikely(!dev || + !is_skb_forwardable(dev, skb) || + net_eq(net, dev_net(dev)))) + goto out_drop; + skb->dev = dev; + return -EAGAIN; } - return flags & BPF_F_NEIGH ? __bpf_redirect_neigh(skb, dev) : __bpf_redirect(skb, dev, flags); +out_drop: + kfree_skb(skb); + return -EINVAL; } BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) @@ -2466,6 +2483,27 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + if (unlikely(flags)) + return TC_ACT_SHOT; + + ri->flags = BPF_F_PEER; + ri->tgt_index = ifindex; + + return TC_ACT_REDIRECT; +} + +static const struct bpf_func_proto bpf_redirect_peer_proto = { + .func = bpf_redirect_peer, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); @@ -7053,6 +7091,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_redirect_proto; case BPF_FUNC_redirect_neigh: return &bpf_redirect_neigh_proto; + case BPF_FUNC_redirect_peer: + return &bpf_redirect_peer_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; case BPF_FUNC_get_hash_recalc: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4272cc53d478..b97bc5abb3b8 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3719,6 +3719,22 @@ union bpf_attr { * never return NULL. * Return * A pointer pointing to the kernel percpu variable on this cpu. + * + * long bpf_redirect_peer(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_redirect**\ (), except + * that the redirection happens to the *ifindex*' peer device and + * the netns switch takes place from ingress to ingress without + * going through the CPU's backlog queue. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types at the ingress + * hook and for veth device types. The peer device must reside in a + * different network namespace. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3876,6 +3892,7 @@ union bpf_attr { FN(redirect_neigh), \ FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ + FN(redirect_peer), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 4a8f87e60f6db40e640f1db555d063b2c4dea5f1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:03 +0200 Subject: bpf: Allow for map-in-map with dynamic inner array map entries Recent work in f4d05259213f ("bpf: Add map_meta_equal map ops") and 134fede4eecf ("bpf: Relax max_entries check for most of the inner map types") added support for dynamic inner max elements for most map-in-map types. Exceptions were maps like array or prog array where the map_gen_lookup() callback uses the maps' max_entries field as a constant when emitting instructions. We recently implemented Maglev consistent hashing into Cilium's load balancer which uses map-in-map with an outer map being hash and inner being array holding the Maglev backend table for each service. This has been designed this way in order to reduce overall memory consumption given the outer hash map allows to avoid preallocating a large, flat memory area for all services. Also, the number of service mappings is not always known a-priori. The use case for dynamic inner array map entries is to further reduce memory overhead, for example, some services might just have a small number of back ends while others could have a large number. Right now the Maglev backend table for small and large number of backends would need to have the same inner array map entries which adds a lot of unneeded overhead. Dynamic inner array map entries can be realized by avoiding the inlined code generation for their lookup. The lookup will still be efficient since it will be calling into array_map_lookup_elem() directly and thus avoiding retpoline. The patch adds a BPF_F_INNER_MAP flag to map creation which therefore skips inline code generation and relaxes array_map_meta_equal() check to ignore both maps' max_entries. This also still allows to have faster lookups for map-in-map when BPF_F_INNER_MAP is not specified and hence dynamic max_entries not needed. Example code generation where inner map is dynamic sized array: # bpftool p d x i 125 int handle__sys_enter(void * ctx): ; int handle__sys_enter(void *ctx) 0: (b4) w1 = 0 ; int key = 0; 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 ; 3: (07) r2 += -4 ; inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key); 4: (18) r1 = map[id:468] 6: (07) r1 += 272 7: (61) r0 = *(u32 *)(r2 +0) 8: (35) if r0 >= 0x3 goto pc+5 9: (67) r0 <<= 3 10: (0f) r0 += r1 11: (79) r0 = *(u64 *)(r0 +0) 12: (15) if r0 == 0x0 goto pc+1 13: (05) goto pc+1 14: (b7) r0 = 0 15: (b4) w6 = -1 ; if (!inner_map) 16: (15) if r0 == 0x0 goto pc+6 17: (bf) r2 = r10 ; 18: (07) r2 += -4 ; val = bpf_map_lookup_elem(inner_map, &key); 19: (bf) r1 = r0 | No inlining but instead 20: (85) call array_map_lookup_elem#149280 | call to array_map_lookup_elem() ; return val ? *val : -1; | for inner array lookup. 21: (15) if r0 == 0x0 goto pc+1 ; return val ? *val : -1; 22: (61) r6 = *(u32 *)(r0 +0) ; } 23: (bc) w0 = w6 24: (95) exit Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201010234006.7075-4-daniel@iogearbox.net --- include/linux/bpf.h | 2 +- include/uapi/linux/bpf.h | 3 +++ kernel/bpf/arraymap.c | 17 +++++++++++------ kernel/bpf/hashtab.c | 6 +++--- kernel/bpf/verifier.c | 6 ++++-- net/xdp/xskmap.c | 2 +- tools/include/uapi/linux/bpf.h | 3 +++ 7 files changed, 26 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dc63eeed4fd9..2b16bf48aab6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -82,7 +82,7 @@ struct bpf_map_ops { void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); void (*map_fd_put_ptr)(void *ptr); - u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); + int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b97bc5abb3b8..bf5a99d803e4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -435,6 +435,9 @@ enum { /* Share perf_event among processes */ BPF_F_PRESERVE_ELEMS = (1U << 11), + +/* Create a map that is suitable to be an inner map with dynamic max entries */ + BPF_F_INNER_MAP = (1U << 12), }; /* Flags for BPF_PROG_QUERY. */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index bd777dd6f967..c6c81eceb68f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -16,7 +16,7 @@ #define ARRAY_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \ - BPF_F_PRESERVE_ELEMS) + BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP) static void bpf_array_free_percpu(struct bpf_array *array) { @@ -62,7 +62,7 @@ int array_map_alloc_check(union bpf_attr *attr) return -EINVAL; if (attr->map_type != BPF_MAP_TYPE_ARRAY && - attr->map_flags & BPF_F_MMAPABLE) + attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP)) return -EINVAL; if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY && @@ -214,7 +214,7 @@ static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, } /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ -static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; @@ -223,6 +223,9 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; + if (map->map_flags & BPF_F_INNER_MAP) + return -EOPNOTSUPP; + *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (!map->bypass_spec_v1) { @@ -496,8 +499,10 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) static bool array_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1) { - return meta0->max_entries == meta1->max_entries && - bpf_map_meta_equal(meta0, meta1); + if (!bpf_map_meta_equal(meta0, meta1)) + return false; + return meta0->map_flags & BPF_F_INNER_MAP ? true : + meta0->max_entries == meta1->max_entries; } struct bpf_iter_seq_array_map_info { @@ -1251,7 +1256,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) return READ_ONCE(*inner_map); } -static u32 array_of_map_gen_lookup(struct bpf_map *map, +static int array_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3395cf140d22..1815e97d4c9c 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -612,7 +612,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key) * bpf_prog * __htab_map_lookup_elem */ -static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; @@ -651,7 +651,7 @@ static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) return __htab_lru_map_lookup_elem(map, key, false); } -static u32 htab_lru_map_gen_lookup(struct bpf_map *map, +static int htab_lru_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_insn *insn = insn_buf; @@ -2070,7 +2070,7 @@ static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) return READ_ONCE(*inner_map); } -static u32 htab_of_map_gen_lookup(struct bpf_map *map, +static int htab_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_insn *insn = insn_buf; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f3e36eade3d4..fa5badc9279a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11049,7 +11049,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) if (insn->imm == BPF_FUNC_map_lookup_elem && ops->map_gen_lookup) { cnt = ops->map_gen_lookup(map_ptr, insn_buf); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + if (cnt == -EOPNOTSUPP) + goto patch_map_ops_generic; + if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } @@ -11079,7 +11081,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map *map, void *value))NULL)); - +patch_map_ops_generic: switch (insn->imm) { case BPF_FUNC_map_lookup_elem: insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 0c5df593bc56..49da2b8ace8b 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -132,7 +132,7 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } -static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; struct bpf_insn *insn = insn_buf; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b97bc5abb3b8..bf5a99d803e4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -435,6 +435,9 @@ enum { /* Share perf_event among processes */ BPF_F_PRESERVE_ELEMS = (1U << 11), + +/* Create a map that is suitable to be an inner map with dynamic max entries */ + BPF_F_INNER_MAP = (1U << 12), }; /* Flags for BPF_PROG_QUERY. */ -- cgit v1.3.1 From 6775dab73bdc17df0f19d9f9d7470dc54536a909 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:04 +0200 Subject: bpf, selftests: Add test for different array inner map size Extend the "diff_size" subtest to also include a non-inlined array map variant where dynamic inner #elems are possible. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201010234006.7075-5-daniel@iogearbox.net --- .../selftests/bpf/prog_tests/btf_map_in_map.c | 39 +++++++++++++++----- .../selftests/bpf/progs/test_btf_map_in_map.c | 43 ++++++++++++++++++++++ 2 files changed, 72 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index 540fea4c91a5..76ebe4c250f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -55,10 +55,10 @@ static int kern_sync_rcu(void) static void test_lookup_update(void) { - int err, key = 0, val, i; + int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id; + int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd; struct test_btf_map_in_map *skel; - int outer_arr_fd, outer_hash_fd; - int fd, map1_fd, map2_fd, map1_id, map2_id; + int err, key = 0, val, i, fd; skel = test_btf_map_in_map__open_and_load(); if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) @@ -70,32 +70,45 @@ static void test_lookup_update(void) map1_fd = bpf_map__fd(skel->maps.inner_map1); map2_fd = bpf_map__fd(skel->maps.inner_map2); + map3_fd = bpf_map__fd(skel->maps.inner_map3); + map4_fd = bpf_map__fd(skel->maps.inner_map4); + map5_fd = bpf_map__fd(skel->maps.inner_map5); + outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn); outer_arr_fd = bpf_map__fd(skel->maps.outer_arr); outer_hash_fd = bpf_map__fd(skel->maps.outer_hash); - /* inner1 = input, inner2 = input + 1 */ - map1_fd = bpf_map__fd(skel->maps.inner_map1); + /* inner1 = input, inner2 = input + 1, inner3 = input + 2 */ bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0); - map2_fd = bpf_map__fd(skel->maps.inner_map2); bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0); + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0); skel->bss->input = 1; usleep(1); - bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1); bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2); + bpf_map_lookup_elem(map3_fd, &key, &val); + CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3); - /* inner1 = input + 1, inner2 = input */ + /* inner2 = input, inner1 = input + 1, inner4 = input + 2 */ bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0); bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0); + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0); skel->bss->input = 3; usleep(1); - bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4); bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3); + bpf_map_lookup_elem(map4_fd, &key, &val); + CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5); + + /* inner5 = input + 2 */ + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0); + skel->bss->input = 5; + usleep(1); + bpf_map_lookup_elem(map5_fd, &key, &val); + CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7); for (i = 0; i < 5; i++) { val = i % 2 ? map1_fd : map2_fd; @@ -106,7 +119,13 @@ static void test_lookup_update(void) } err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0); if (CHECK_FAIL(err)) { - printf("failed to update hash_of_maps on iter #%d\n", i); + printf("failed to update array_of_maps on iter #%d\n", i); + goto cleanup; + } + val = i % 2 ? map4_fd : map5_fd; + err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0); + if (CHECK_FAIL(err)) { + printf("failed to update array_of_maps (dyn) on iter #%d\n", i); goto cleanup; } } diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c index 193fe0198b21..c1e0c8c7c55f 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -41,6 +41,43 @@ struct outer_arr { .values = { (void *)&inner_map1, 0, (void *)&inner_map2 }, }; +struct inner_map_sz3 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 3); + __type(key, int); + __type(value, int); +} inner_map3 SEC(".maps"), + inner_map4 SEC(".maps"); + +struct inner_map_sz4 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 5); + __type(key, int); + __type(value, int); +} inner_map5 SEC(".maps"); + +struct outer_arr_dyn { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 3); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + }); +} outer_arr_dyn SEC(".maps") = { + .values = { + [0] = (void *)&inner_map3, + [1] = (void *)&inner_map4, + [2] = (void *)&inner_map5, + }, +}; + struct outer_hash { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 5); @@ -101,6 +138,12 @@ int handle__sys_enter(void *ctx) val = input + 1; bpf_map_update_elem(inner_map, &key, &val, 0); + inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key); + if (!inner_map) + return 1; + val = input + 2; + bpf_map_update_elem(inner_map, &key, &val, 0); + return 0; } -- cgit v1.3.1 From 57a73fe7c1988ae726de23ee7c3c49ca82221751 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:05 +0200 Subject: bpf, selftests: Make redirect_neigh test more extensible Rename into test_tc_redirect.sh and move setup and test code into separate functions so they can be reused for newly added tests in here. Also remove the crude hack to override ifindex inside the object file via xxd and sed and just use a simple map instead. Map given iproute2 does not support BTF fully and therefore neither global data at this point. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201010234006.7075-6-daniel@iogearbox.net --- tools/testing/selftests/bpf/progs/test_tc_neigh.c | 40 +++-- tools/testing/selftests/bpf/test_tc_neigh.sh | 168 ------------------ tools/testing/selftests/bpf/test_tc_redirect.sh | 197 ++++++++++++++++++++++ 3 files changed, 219 insertions(+), 186 deletions(-) delete mode 100755 tools/testing/selftests/bpf/test_tc_neigh.sh create mode 100755 tools/testing/selftests/bpf/test_tc_redirect.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index 889a72c3024f..fe182616b112 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -13,17 +13,10 @@ #include #include -#ifndef barrier_data -# define barrier_data(ptr) asm volatile("": :"r"(ptr) :"memory") -#endif - #ifndef ctx_ptr # define ctx_ptr(field) (void *)(long)(field) #endif -#define dst_to_src_tmp 0xeeddddeeU -#define src_to_dst_tmp 0xeeffffeeU - #define ip4_src 0xac100164 /* 172.16.1.100 */ #define ip4_dst 0xac100264 /* 172.16.2.100 */ @@ -39,6 +32,18 @@ a.s6_addr32[3] == b.s6_addr32[3]) #endif +enum { + dev_src, + dev_dst, +}; + +struct bpf_map_def SEC("maps") ifindex_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, __be32 addr) { @@ -73,7 +78,14 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, return v6_equal(ip6h->daddr, addr); } -SEC("chk_neigh") int tc_chk(struct __sk_buff *skb) +static __always_inline int get_dev_ifindex(int which) +{ + int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + + return ifindex ? *ifindex : 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -87,7 +99,6 @@ SEC("chk_neigh") int tc_chk(struct __sk_buff *skb) SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) { - int idx = dst_to_src_tmp; __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -103,19 +114,15 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) if (!redirect) return TC_ACT_OK; - barrier_data(&idx); - idx = bpf_ntohl(idx); - __builtin_memset(&zero, 0, sizeof(zero)); if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(idx, 0); + return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0); } SEC("src_ingress") int tc_src(struct __sk_buff *skb) { - int idx = src_to_dst_tmp; __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -131,14 +138,11 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb) if (!redirect) return TC_ACT_OK; - barrier_data(&idx); - idx = bpf_ntohl(idx); - __builtin_memset(&zero, 0, sizeof(zero)); if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(idx, 0); + return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_neigh.sh b/tools/testing/selftests/bpf/test_tc_neigh.sh deleted file mode 100755 index 31d8c3df8b24..000000000000 --- a/tools/testing/selftests/bpf/test_tc_neigh.sh +++ /dev/null @@ -1,168 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link -# between src and dst. The netns fwd has veth links to each src and dst. The -# client is in src and server in dst. The test installs a TC BPF program to each -# host facing veth in fwd which calls into bpf_redirect_peer() to perform the -# neigh addr population and redirect; it also installs a dropper prog on the -# egress side to drop skbs if neigh addrs were not populated. - -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -# check that nc, dd, ping, ping6 and timeout are present -command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v dd >/dev/null 2>&1 || \ - { echo >&2 "dd is not available"; exit 1; } -command -v timeout >/dev/null 2>&1 || \ - { echo >&2 "timeout is not available"; exit 1; } -command -v ping >/dev/null 2>&1 || \ - { echo >&2 "ping is not available"; exit 1; } -command -v ping6 >/dev/null 2>&1 || \ - { echo >&2 "ping6 is not available"; exit 1; } - -readonly GREEN='\033[0;92m' -readonly RED='\033[0;31m' -readonly NC='\033[0m' # No Color - -readonly PING_ARG="-c 3 -w 10 -q" - -readonly TIMEOUT=10 - -readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" -readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" -readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" - -readonly IP4_SRC="172.16.1.100" -readonly IP4_DST="172.16.2.100" - -readonly IP6_SRC="::1:dead:beef:cafe" -readonly IP6_DST="::2:dead:beef:cafe" - -readonly IP4_SLL="169.254.0.1" -readonly IP4_DLL="169.254.0.2" -readonly IP4_NET="169.254.0.0" - -cleanup() -{ - ip netns del ${NS_SRC} - ip netns del ${NS_FWD} - ip netns del ${NS_DST} -} - -trap cleanup EXIT - -set -e - -ip netns add "${NS_SRC}" -ip netns add "${NS_FWD}" -ip netns add "${NS_DST}" - -ip link add veth_src type veth peer name veth_src_fwd -ip link add veth_dst type veth peer name veth_dst_fwd - -ip link set veth_src netns ${NS_SRC} -ip link set veth_src_fwd netns ${NS_FWD} - -ip link set veth_dst netns ${NS_DST} -ip link set veth_dst_fwd netns ${NS_FWD} - -ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src -ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst - -# The fwd netns automatically get a v6 LL address / routes, but also needs v4 -# one in order to start ARP probing. IP4_NET route is added to the endpoints -# so that the ARP processing will reply. - -ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd -ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd - -ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad -ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad - -ip -netns ${NS_SRC} link set dev veth_src up -ip -netns ${NS_FWD} link set dev veth_src_fwd up - -ip -netns ${NS_DST} link set dev veth_dst up -ip -netns ${NS_FWD} link set dev veth_dst_fwd up - -ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global -ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global -ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global - -ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global -ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global - -ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global -ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global -ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global - -ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global -ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global - -fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) -fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) - -ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src -ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst - -ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src -ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst - -veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex | awk '{printf "%08x\n", $1}') -veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex | awk '{printf "%08x\n", $1}') - -xxd -p < test_tc_neigh.o | sed "s/eeddddee/$veth_src/g" | xxd -r -p > test_tc_neigh.x.o -xxd -p < test_tc_neigh.x.o | sed "s/eeffffee/$veth_dst/g" | xxd -r -p > test_tc_neigh.y.o - -ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact -ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.y.o sec src_ingress -ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh - -ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact -ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.y.o sec dst_ingress -ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh - -rm -f test_tc_neigh.x.o test_tc_neigh.y.o - -ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" -ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" - -set +e - -TEST="TCPv4 connectivity test" -ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" -if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 -fi -echo -e "${TEST}: ${GREEN}PASS${NC}" - -TEST="TCPv6 connectivity test" -ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" -if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 -fi -echo -e "${TEST}: ${GREEN}PASS${NC}" - -TEST="ICMPv4 connectivity test" -ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} -if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 -fi -echo -e "${TEST}: ${GREEN}PASS${NC}" - -TEST="ICMPv6 connectivity test" -ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} -if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 -fi -echo -e "${TEST}: ${GREEN}PASS${NC}" diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh new file mode 100755 index 000000000000..6ad441405132 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_redirect.sh @@ -0,0 +1,197 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link +# between src and dst. The netns fwd has veth links to each src and dst. The +# client is in src and server in dst. The test installs a TC BPF program to each +# host facing veth in fwd which calls into bpf_redirect_peer() to perform the +# neigh addr population and redirect; it also installs a dropper prog on the +# egress side to drop skbs if neigh addrs were not populated. + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +# check that needed tools are present +command -v nc >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v dd >/dev/null 2>&1 || \ + { echo >&2 "dd is not available"; exit 1; } +command -v timeout >/dev/null 2>&1 || \ + { echo >&2 "timeout is not available"; exit 1; } +command -v ping >/dev/null 2>&1 || \ + { echo >&2 "ping is not available"; exit 1; } +command -v ping6 >/dev/null 2>&1 || \ + { echo >&2 "ping6 is not available"; exit 1; } +command -v perl >/dev/null 2>&1 || \ + { echo >&2 "perl is not available"; exit 1; } +command -v jq >/dev/null 2>&1 || \ + { echo >&2 "jq is not available"; exit 1; } +command -v bpftool >/dev/null 2>&1 || \ + { echo >&2 "bpftool is not available"; exit 1; } + +readonly GREEN='\033[0;92m' +readonly RED='\033[0;31m' +readonly NC='\033[0m' # No Color + +readonly PING_ARG="-c 3 -w 10 -q" + +readonly TIMEOUT=10 + +readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" +readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" +readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" + +readonly IP4_SRC="172.16.1.100" +readonly IP4_DST="172.16.2.100" + +readonly IP6_SRC="::1:dead:beef:cafe" +readonly IP6_DST="::2:dead:beef:cafe" + +readonly IP4_SLL="169.254.0.1" +readonly IP4_DLL="169.254.0.2" +readonly IP4_NET="169.254.0.0" + +netns_cleanup() +{ + ip netns del ${NS_SRC} + ip netns del ${NS_FWD} + ip netns del ${NS_DST} +} + +netns_setup() +{ + ip netns add "${NS_SRC}" + ip netns add "${NS_FWD}" + ip netns add "${NS_DST}" + + ip link add veth_src type veth peer name veth_src_fwd + ip link add veth_dst type veth peer name veth_dst_fwd + + ip link set veth_src netns ${NS_SRC} + ip link set veth_src_fwd netns ${NS_FWD} + + ip link set veth_dst netns ${NS_DST} + ip link set veth_dst_fwd netns ${NS_FWD} + + ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src + ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst + + # The fwd netns automatically get a v6 LL address / routes, but also + # needs v4 one in order to start ARP probing. IP4_NET route is added + # to the endpoints so that the ARP processing will reply. + + ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd + ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd + + ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad + ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad + + ip -netns ${NS_SRC} link set dev veth_src up + ip -netns ${NS_FWD} link set dev veth_src_fwd up + + ip -netns ${NS_DST} link set dev veth_dst up + ip -netns ${NS_FWD} link set dev veth_dst_fwd up + + ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global + ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global + ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global + + ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global + ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global + + ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global + ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global + ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global + + ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global + ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global + + fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) + fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) + + ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src + ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst + + ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src + ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst +} + +netns_test_connectivity() +{ + set +e + + ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" + ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" + + TEST="TCPv4 connectivity test" + ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="TCPv6 connectivity test" + ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="ICMPv4 connectivity test" + ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="ICMPv6 connectivity test" + ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + set -e +} + +hex_mem_str() +{ + perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 +} + +netns_setup_neigh() +{ + ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.o sec src_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj test_tc_neigh.o sec chk_egress + + ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.o sec dst_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj test_tc_neigh.o sec chk_egress + + veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) + veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) + + progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') + for prog in $progs; do + map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') + if [ ! -z "$map" ]; then + bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) + bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) + fi + done +} + +trap netns_cleanup EXIT +set -e + +netns_setup +netns_setup_neigh +netns_test_connectivity -- cgit v1.3.1 From 9f4c53ca23a28c891d2bd3ff4738f7d95ba0303b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:06 +0200 Subject: bpf, selftests: Add redirect_peer selftest Extend the test_tc_redirect test and add a small test that exercises the new redirect_peer() helper for the IPv4 and IPv6 case. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201010234006.7075-7-daniel@iogearbox.net --- tools/testing/selftests/bpf/progs/test_tc_peer.c | 45 ++++++++++++++++++++++++ tools/testing/selftests/bpf/test_tc_redirect.sh | 25 ++++++++----- 2 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_tc_peer.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c new file mode 100644 index 000000000000..fc84a7685aa2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include +#include +#include + +#include + +enum { + dev_src, + dev_dst, +}; + +struct bpf_map_def SEC("maps") ifindex_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +static __always_inline int get_dev_ifindex(int which) +{ + int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + + return ifindex ? *ifindex : 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + return TC_ACT_SHOT; +} + +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh index 6ad441405132..6d7482562140 100755 --- a/tools/testing/selftests/bpf/test_tc_redirect.sh +++ b/tools/testing/selftests/bpf/test_tc_redirect.sh @@ -4,9 +4,10 @@ # This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link # between src and dst. The netns fwd has veth links to each src and dst. The # client is in src and server in dst. The test installs a TC BPF program to each -# host facing veth in fwd which calls into bpf_redirect_peer() to perform the -# neigh addr population and redirect; it also installs a dropper prog on the -# egress side to drop skbs if neigh addrs were not populated. +# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the +# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace +# switch from ingress side; it also installs a checker prog on the egress side +# to drop unexpected traffic. if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" @@ -166,15 +167,17 @@ hex_mem_str() perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 } -netns_setup_neigh() +netns_setup_bpf() { + local obj=$1 + ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.o sec src_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj test_tc_neigh.o sec chk_egress + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.o sec dst_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj test_tc_neigh.o sec chk_egress + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) @@ -193,5 +196,9 @@ trap netns_cleanup EXIT set -e netns_setup -netns_setup_neigh +netns_setup_bpf test_tc_neigh.o +netns_test_connectivity +netns_cleanup +netns_setup +netns_setup_bpf test_tc_peer.o netns_test_connectivity -- cgit v1.3.1 From 82c200be7c4363c1769b32ffea485407af178bb6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 8 Oct 2020 14:57:00 +0300 Subject: selftests: net: mscc: ocelot: add test for VLAN modify action Create a test that changes a VLAN ID from 200 to 300. We also need to modify the preferences of the filters installed for the other rules so that they are unique, because we now install the "tc-vlan modify" filter in VCAP IS1 only temporarily, and we need to perform the deletion by filter preference number. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- .../drivers/net/ocelot/tc_flower_chains.sh | 47 +++++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index 71a538add08a..beee0d5646a6 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -166,6 +166,9 @@ setup_prepare() ip link add link $eth3 name $eth3.100 type vlan id 100 ip link set $eth3.100 up + ip link add link $eth3 name $eth3.200 type vlan id 200 + ip link set $eth3.200 up + tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \ protocol 802.1Q flower skip_sw vlan_id 100 \ action vlan pop \ @@ -175,12 +178,12 @@ setup_prepare() flower skip_sw indev $eth1 \ action vlan push protocol 802.1Q id 100 - tc filter add dev $eth0 ingress chain $(IS1 0) \ + tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \ protocol ipv4 flower skip_sw src_ip 10.1.1.2 \ action skbedit priority 7 \ action goto chain $(IS1 1) - tc filter add dev $eth0 ingress chain $(IS2 0 0) \ + tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \ protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \ action police rate 50mbit burst 64k \ action goto chain $(IS2 1 0) @@ -188,6 +191,7 @@ setup_prepare() cleanup() { + ip link del $eth3.200 ip link del $eth3.100 tc qdisc del dev $eth0 clsact ip link del br0 @@ -238,6 +242,44 @@ test_vlan_push() tcpdump_cleanup } +test_vlan_modify() +{ + printf "Testing VLAN modification.. " + + ip link set br0 type bridge vlan_filtering 1 + bridge vlan add dev $eth0 vid 200 + bridge vlan add dev $eth0 vid 300 + bridge vlan add dev $eth1 vid 300 + + tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \ + protocol 802.1Q flower skip_sw vlan_id 200 \ + action vlan modify id 300 \ + action goto chain $(IS2 0 0) + + tcpdump_start $eth2 + + $MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup + + tc filter del dev $eth0 ingress chain $(IS1 2) pref 3 + + bridge vlan del dev $eth0 vid 200 + bridge vlan del dev $eth0 vid 300 + bridge vlan del dev $eth1 vid 300 + ip link set br0 type bridge vlan_filtering 0 +} + test_skbedit_priority() { local num_pkts=100 @@ -262,6 +304,7 @@ trap cleanup EXIT ALL_TESTS=" test_vlan_pop test_vlan_push + test_vlan_modify test_skbedit_priority " -- cgit v1.3.1 From ea2f7da1799bfc5a98b8e0c9e9cf1a7fedc32549 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Oct 2020 13:58:34 +0200 Subject: selftests: netfilter: extend nfqueue test case add a test with re-queueing: usespace doesn't pass accept verdict, but tells to re-queue to another nf_queue instance. Also, make the second nf-queue program use non-gso mode, kernel will have to perform software segmentation. Lastly, do not queue every packet, just one per second, and add delay when re-injecting the packet to the kernel. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nf-queue.c | 61 ++++++++++++++++++---- tools/testing/selftests/netfilter/nft_queue.sh | 70 +++++++++++++++++++++----- 2 files changed, 109 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c index 29c73bce38fa..9e56b9d47037 100644 --- a/tools/testing/selftests/netfilter/nf-queue.c +++ b/tools/testing/selftests/netfilter/nf-queue.c @@ -17,9 +17,12 @@ struct options { bool count_packets; + bool gso_enabled; int verbose; unsigned int queue_num; unsigned int timeout; + uint32_t verdict; + uint32_t delay_ms; }; static unsigned int queue_stats[5]; @@ -27,7 +30,7 @@ static struct options opts; static void help(const char *p) { - printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p); + printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); } static int parse_attr_cb(const struct nlattr *attr, void *data) @@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num) } static struct nlmsghdr * -nfq_build_verdict(char *buf, int id, int queue_num, int verd) +nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd) { struct nfqnl_msg_verdict_hdr vh = { .verdict = htonl(verd), @@ -189,9 +192,6 @@ static void print_stats(void) unsigned int last, total; int i; - if (!opts.count_packets) - return; - total = 0; last = queue_stats[0]; @@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void) nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num); - flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID; + flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; + flags |= NFQA_CFG_F_UID_GID; mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); @@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void) return nl; } +static void sleep_ms(uint32_t delay) +{ + struct timespec ts = { .tv_sec = delay / 1000 }; + + delay %= 1000; + + ts.tv_nsec = delay * 1000llu * 1000llu; + + nanosleep(&ts, NULL); +} + static int mainloop(void) { unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; @@ -278,7 +290,7 @@ static int mainloop(void) ret = mnl_socket_recvfrom(nl, buf, buflen); if (ret == -1) { - if (errno == ENOBUFS) + if (errno == ENOBUFS || errno == EINTR) continue; if (errno == EAGAIN) { @@ -298,7 +310,10 @@ static int mainloop(void) } id = ret - MNL_CB_OK; - nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT); + if (opts.delay_ms) + sleep_ms(opts.delay_ms); + + nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { perror("mnl_socket_sendto"); exit(EXIT_FAILURE); @@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "chvt:q:")) != -1) { + while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { switch (c) { case 'c': opts.count_packets = true; @@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv) if (opts.queue_num > 0xffff) opts.queue_num = 0; break; + case 'Q': + opts.verdict = atoi(optarg); + if (opts.verdict > 0xffff) { + fprintf(stderr, "Expected destination queue number\n"); + exit(1); + } + + opts.verdict <<= 16; + opts.verdict |= NF_QUEUE; + break; + case 'd': + opts.delay_ms = atoi(optarg); + if (opts.delay_ms == 0) { + fprintf(stderr, "Expected nonzero delay (in milliseconds)\n"); + exit(1); + } + break; case 't': opts.timeout = atoi(optarg); break; + case 'G': + opts.gso_enabled = false; + break; case 'v': opts.verbose++; break; } } + + if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) { + fprintf(stderr, "Cannot use same destination and source queue\n"); + exit(1); + } } int main(int argc, char *argv[]) { int ret; + opts.verdict = NF_ACCEPT; + opts.gso_enabled = true; + parse_opts(argc, argv); ret = mainloop(); diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 6898448b4266..3d202b90b33d 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX") ns1="ns1-$sfx" ns2="ns2-$sfx" nsrouter="nsrouter-$sfx" +timeout=4 cleanup() { @@ -20,6 +21,7 @@ cleanup() ip netns del ${nsrouter} rm -f "$TMPFILE0" rm -f "$TMPFILE1" + rm -f "$TMPFILE2" "$TMPFILE3" } nft --version > /dev/null 2>&1 @@ -42,6 +44,8 @@ fi TMPFILE0=$(mktemp) TMPFILE1=$(mktemp) +TMPFILE2=$(mktemp) +TMPFILE3=$(mktemp) trap cleanup EXIT ip netns add ${ns1} @@ -83,7 +87,7 @@ load_ruleset() { local name=$1 local prio=$2 -ip netns exec ${nsrouter} nft -f - < /dev/null + ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null lret=$? elif [ $proto = "ip6" ]; then - ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null + ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null lret=$? else lret=111 @@ -214,8 +218,8 @@ test_queue() local last="" # spawn nf-queue listeners - ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" & - ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" & + ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" & + ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" & sleep 1 test_ping ret=$? @@ -250,11 +254,11 @@ test_queue() test_tcp_forward() { - ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 & + ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout & local nfqpid=$! tmpfile=$(mktemp) || exit 1 - dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & local rpid=$! @@ -270,15 +274,13 @@ test_tcp_forward() test_tcp_localhost() { - tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1% - tmpfile=$(mktemp) || exit 1 - dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & local rpid=$! - ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 & + ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout & local nfqpid=$! sleep 1 @@ -287,6 +289,47 @@ test_tcp_localhost() wait $rpid [ $? -eq 0 ] && echo "PASS: tcp via loopback" + wait 2>/dev/null +} + +test_tcp_localhost_requeue() +{ +ip netns exec ${nsrouter} nft -f /dev/stdin </dev/null & + local rpid=$! + + ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" & + + # nfqueue 1 will be called via output hook. But this time, + # re-queue the packet to nfqueue program on queue 2. + ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" & + + sleep 1 + ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null + rm -f "$tmpfile" + + wait + + if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then + echo "FAIL: lost packets during requeue?!" 1>&2 + return + fi + + echo "PASS: tcp via loopback and re-queueing" } ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -328,5 +371,6 @@ test_queue 20 test_tcp_forward test_tcp_localhost +test_tcp_localhost_requeue exit $ret -- cgit v1.3.1 From cdf43c4bfa1a465ba7996165b75c49b5c0e46b50 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 10 Oct 2020 22:10:04 -0700 Subject: bpf, selftests: Add option to test_sockmap to omit adding parser program Add option to allow running without a parser program in place. To test with ping/pong program use, # test_sockmap -t ping --txmsg_omit_skb_parser this will send packets between two socket bouncing through a proxy socket that does not use a parser program. (ping) (pong) sender proxy_recv proxy_send recv | | | | verdict -----+ | | | | | +----------------+ +------------+ Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/160239300387.8495.11908295143121563076.stgit@john-Precision-5820-Tower --- tools/testing/selftests/bpf/test_sockmap.c | 35 ++++++++++++++++++------------ 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 5cf45455de42..419c0b010d14 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir; int ktls; int peek_flag; int skb_use_parser; +int txmsg_omit_skb_parser; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -111,6 +112,7 @@ static const struct option long_options[] = { {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, {"peek", no_argument, &peek_flag, 1 }, + {"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1}, {"whitelist", required_argument, NULL, 'n' }, {"blacklist", required_argument, NULL, 'b' }, {0, 0, NULL, 0 } @@ -175,6 +177,7 @@ static void test_reset(void) txmsg_apply = txmsg_cork = 0; txmsg_ingress = txmsg_redir_skb = 0; txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; + txmsg_omit_skb_parser = 0; skb_use_parser = 0; } @@ -912,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) goto run; /* Attach programs to sockmap */ - err = bpf_prog_attach(prog_fd[0], map_fd[0], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { - fprintf(stderr, - "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[0], err, strerror(errno)); - return err; + if (!txmsg_omit_skb_parser) { + err = bpf_prog_attach(prog_fd[0], map_fd[0], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[0], err, strerror(errno)); + return err; + } } err = bpf_prog_attach(prog_fd[1], map_fd[0], @@ -931,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) /* Attach programs to TLS sockmap */ if (txmsg_ktls_skb) { - err = bpf_prog_attach(prog_fd[0], map_fd[8], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { - fprintf(stderr, - "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[8], err, strerror(errno)); - return err; + if (!txmsg_omit_skb_parser) { + err = bpf_prog_attach(prog_fd[0], map_fd[8], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[8], err, strerror(errno)); + return err; + } } err = bpf_prog_attach(prog_fd[2], map_fd[8], -- cgit v1.3.1 From a24fb420a5775581050026eba2264c87927debf7 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 10 Oct 2020 22:10:26 -0700 Subject: bpf, selftests: Add three new sockmap tests for verdict only programs Here we add three new tests for sockmap to test having a verdict program without setting the parser program. The first test covers the most simply case, sender proxy_recv proxy_send recv | | | | verdict -----+ | | | | | +----------------+ +------------+ We load the verdict program on the proxy_recv socket without a parser program. It then does a redirect into the send path of the proxy_send socket using sendpage_locked(). Next we test the drop case to ensure if we kfree_skb as a result of the verdict program everything behaves as expected. Next we test the same configuration above, but with ktls and a redirect into socket ingress queue. Shown here tls tls sender proxy_recv proxy_send recv | | | | verdict ------------------+ | | redirect_ingress +----------------+ Also to set up ping/pong test Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/160239302638.8495.17125996694402793471.stgit@john-Precision-5820-Tower --- tools/testing/selftests/bpf/test_sockmap.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 419c0b010d14..0fa1e421c3d7 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1472,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt) txmsg_ktls_skb_drop = 0; txmsg_ktls_skb_redir = 1; test_exec(cgrp, opt); + txmsg_ktls_skb_redir = 0; + + /* Tests that omit skb_parser */ + txmsg_omit_skb_parser = 1; + ktls = 0; + txmsg_ktls_skb = 0; + test_exec(cgrp, opt); + + txmsg_ktls_skb_drop = 1; + test_exec(cgrp, opt); + txmsg_ktls_skb_drop = 0; + + txmsg_ktls_skb_redir = 1; + test_exec(cgrp, opt); + + ktls = 1; + test_exec(cgrp, opt); + txmsg_omit_skb_parser = 0; opt->data_test = data; ktls = k; } - /* Test cork with hung data. This tests poor usage patterns where * cork can leave data on the ring if user program is buggy and * doesn't flush them somehow. They do take some time however -- cgit v1.3.1 From ab0a40ea88204e1291b56da8128e2845fec8ee88 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 9 Oct 2020 14:25:23 +0200 Subject: perf build: Allow nested externs to enable BUILD_BUG() usage Currently the BUILD_BUG() macro is expanded to the following: do { extern void __compiletime_assert_0(void) __attribute__((error("BUILD_BUG failed"))); if (!(!(1))) __compiletime_assert_0(); } while (0); If used in a function body this would obviously produce build errors with -Wnested-externs and -Werror. To enable BUILD_BUG() usage in tools/arch/x86/lib/insn.c which perf includes in intel-pt-decoder, build perf without -Wnested-externs. Reported-by: Stephen Rothwell Tested-by: Stephen Rothwell # build tested Signed-off-by: Vasily Gorbik Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/patch-1.thread-251403.git-2514037e9477.your-ad-here.call-01602244460-ext-7088@work.hours --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 190be4fa5c21..8137a6046a47 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -16,7 +16,7 @@ $(shell printf "" > $(OUTPUT).config-detected) detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) -CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) +CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) include $(srctree)/tools/scripts/Makefile.arch -- cgit v1.3.1 From 0bf02a0d80427f263195c1e5a4c8ada14bd5d261 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Oct 2020 16:02:09 +0900 Subject: perf bench: Add build-id injection benchmark Sometimes I can see that 'perf record' piped with 'perf inject' take a long time processing build-ids. So introduce a inject-build-id benchmark to the internals benchmark suite to measure its overhead regularly. It runs the 'perf inject' command internally and feeds the given number of synthesized events (MMAP2 + SAMPLE basically). Usage: perf bench internals inject-build-id -i, --iterations Number of iterations used to compute average (default: 100) -m, --nr-mmaps Number of mmap events for each iteration (default: 100) -n, --nr-samples Number of sample events per mmap event (default: 100) -v, --verbose be more verbose (show iteration count, DSO name, etc) By default, it measures average processing time of 100 MMAP2 events and 10000 SAMPLE events. Below is a result on my laptop. $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 25.789 msec (+- 0.202 msec) Average time per event: 2.528 usec (+- 0.020 usec) Average memory usage: 8411 KB (+- 7 KB) Committer testing: $ perf bench Usage: perf bench [] [] # List of all available benchmark collections: sched: Scheduler and IPC benchmarks syscall: System call benchmarks mem: Memory access benchmarks numa: NUMA scheduling and MM benchmarks futex: Futex stressing benchmarks epoll: Epoll stressing benchmarks internals: Perf-internals benchmarks all: All benchmarks $ perf bench internals # List of available benchmarks for collection 'internals': synthesize: Benchmark perf event synthesis kallsyms-parse: Benchmark kallsyms parsing inject-build-id: Benchmark build-id injection $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.202 msec (+- 0.059 msec) Average time per event: 1.392 usec (+- 0.006 usec) Average memory usage: 12650 KB (+- 10 KB) Average build-id-all injection took: 12.831 msec (+- 0.071 msec) Average time per event: 1.258 usec (+- 0.007 usec) Average memory usage: 11895 KB (+- 10 KB) $ $ perf stat -r5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.380 msec (+- 0.056 msec) Average time per event: 1.410 usec (+- 0.006 usec) Average memory usage: 12608 KB (+- 11 KB) Average build-id-all injection took: 11.889 msec (+- 0.064 msec) Average time per event: 1.166 usec (+- 0.006 usec) Average memory usage: 11838 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.246 msec (+- 0.065 msec) Average time per event: 1.397 usec (+- 0.006 usec) Average memory usage: 12744 KB (+- 10 KB) Average build-id-all injection took: 12.019 msec (+- 0.066 msec) Average time per event: 1.178 usec (+- 0.006 usec) Average memory usage: 11963 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.321 msec (+- 0.067 msec) Average time per event: 1.404 usec (+- 0.007 usec) Average memory usage: 12690 KB (+- 10 KB) Average build-id-all injection took: 11.909 msec (+- 0.041 msec) Average time per event: 1.168 usec (+- 0.004 usec) Average memory usage: 11938 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.287 msec (+- 0.059 msec) Average time per event: 1.401 usec (+- 0.006 usec) Average memory usage: 12864 KB (+- 10 KB) Average build-id-all injection took: 11.862 msec (+- 0.058 msec) Average time per event: 1.163 usec (+- 0.006 usec) Average memory usage: 12103 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.402 msec (+- 0.053 msec) Average time per event: 1.412 usec (+- 0.005 usec) Average memory usage: 12876 KB (+- 10 KB) Average build-id-all injection took: 11.826 msec (+- 0.061 msec) Average time per event: 1.159 usec (+- 0.006 usec) Average memory usage: 12111 KB (+- 10 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 4,267.48 msec task-clock:u # 1.502 CPUs utilized ( +- 0.14% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 102,092 page-faults:u # 0.024 M/sec ( +- 0.08% ) 3,894,589,578 cycles:u # 0.913 GHz ( +- 0.19% ) (83.49%) 140,078,421 stalled-cycles-frontend:u # 3.60% frontend cycles idle ( +- 0.77% ) (83.34%) 948,581,189 stalled-cycles-backend:u # 24.36% backend cycles idle ( +- 0.46% ) (83.25%) 5,835,587,719 instructions:u # 1.50 insn per cycle # 0.16 stalled cycles per insn ( +- 0.21% ) (83.24%) 1,267,423,636 branches:u # 296.996 M/sec ( +- 0.22% ) (83.12%) 17,484,290 branch-misses:u # 1.38% of all branches ( +- 0.12% ) (83.55%) 2.84176 +- 0.00222 seconds time elapsed ( +- 0.08% ) $ Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20201012070214.2074921-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/Build | 1 + tools/perf/bench/bench.h | 1 + tools/perf/bench/inject-buildid.c | 460 ++++++++++++++++++++++++++++++++++++++ tools/perf/builtin-bench.c | 1 + tools/perf/builtin-inject.c | 9 +- tools/perf/util/build-id.h | 4 + 6 files changed, 471 insertions(+), 5 deletions(-) create mode 100644 tools/perf/bench/inject-buildid.c (limited to 'tools') diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index dd68a40a790c..8b52591338d6 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -12,6 +12,7 @@ perf-y += epoll-ctl.o perf-y += synthesize.o perf-y += kallsyms-parse.o perf-y += find-bit-bench.o +perf-y += inject-buildid.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 2804812d4154..eac36afab2b3 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -47,6 +47,7 @@ int bench_epoll_wait(int argc, const char **argv); int bench_epoll_ctl(int argc, const char **argv); int bench_synthesize(int argc, const char **argv); int bench_kallsyms_parse(int argc, const char **argv); +int bench_inject_build_id(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c new file mode 100644 index 000000000000..0fccf2a9e95b --- /dev/null +++ b/tools/perf/bench/inject-buildid.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bench.h" +#include "util/data.h" +#include "util/stat.h" +#include "util/debug.h" +#include "util/event.h" +#include "util/symbol.h" +#include "util/session.h" +#include "util/build-id.h" +#include "util/synthetic-events.h" + +#define MMAP_DEV_MAJOR 8 +#define DSO_MMAP_RATIO 4 + +static unsigned int iterations = 100; +static unsigned int nr_mmaps = 100; +static unsigned int nr_samples = 100; /* samples per mmap */ + +static u64 bench_sample_type; +static u16 bench_id_hdr_size; + +struct bench_data { + int pid; + int input_pipe[2]; + int output_pipe[2]; + pthread_t th; +}; + +struct bench_dso { + struct list_head list; + char *name; + int ino; +}; + +static int nr_dsos; +static struct bench_dso *dsos; + +extern int cmd_inject(int argc, const char *argv[]); + +static const struct option options[] = { + OPT_UINTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average (default: 100)"), + OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps, + "Number of mmap events for each iteration (default: 100)"), + OPT_UINTEGER('n', "nr-samples", &nr_samples, + "Number of sample events per mmap event (default: 100)"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show iteration count, DSO name, etc)"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals inject-build-id ", + NULL +}; + +/* + * Helper for collect_dso that adds the given file as a dso to dso_list + * if it contains a build-id. Stops after collecting 4 times more than + * we need (for MMAP2 events). + */ +static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, + int typeflag, struct FTW *ftwbuf __maybe_unused) +{ + struct bench_dso *dso = &dsos[nr_dsos]; + unsigned char build_id[BUILD_ID_SIZE]; + + if (typeflag == FTW_D || typeflag == FTW_SL) + return 0; + + if (filename__read_build_id(fpath, build_id, BUILD_ID_SIZE) < 0) + return 0; + + dso->name = realpath(fpath, NULL); + if (dso->name == NULL) + return -1; + + dso->ino = nr_dsos++; + pr_debug2(" Adding DSO: %s\n", fpath); + + /* stop if we collected enough DSOs */ + if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps) + return 1; + + return 0; +} + +static void collect_dso(void) +{ + dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos)); + if (dsos == NULL) { + printf(" Memory allocation failed\n"); + exit(1); + } + + if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0) + return; + + pr_debug(" Collected %d DSOs\n", nr_dsos); +} + +static void release_dso(void) +{ + int i; + + for (i = 0; i < nr_dsos; i++) { + struct bench_dso *dso = &dsos[i]; + + free(dso->name); + } + free(dsos); +} + +/* Fake address used by mmap and sample events */ +static u64 dso_map_addr(struct bench_dso *dso) +{ + return 0x400000ULL + dso->ino * 8192ULL; +} + +static u32 synthesize_attr(struct bench_data *data) +{ + union perf_event event; + + memset(&event, 0, sizeof(event.attr) + sizeof(u64)); + + event.header.type = PERF_RECORD_HEADER_ATTR; + event.header.size = sizeof(event.attr) + sizeof(u64); + + event.attr.attr.type = PERF_TYPE_SOFTWARE; + event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK; + event.attr.attr.exclude_kernel = 1; + event.attr.attr.sample_id_all = 1; + event.attr.attr.sample_type = bench_sample_type; + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_fork(struct bench_data *data) +{ + union perf_event event; + + memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size); + + event.header.type = PERF_RECORD_FORK; + event.header.misc = PERF_RECORD_MISC_FORK_EXEC; + event.header.size = sizeof(event.fork) + bench_id_hdr_size; + + event.fork.ppid = 1; + event.fork.ptid = 1; + event.fork.pid = data->pid; + event.fork.tid = data->pid; + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso, + u64 timestamp) +{ + union perf_event event; + size_t len = offsetof(struct perf_record_mmap2, filename); + u64 *id_hdr_ptr = (void *)&event; + int ts_idx; + + len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size; + + memset(&event, 0, min(len, sizeof(event.mmap2))); + + event.header.type = PERF_RECORD_MMAP2; + event.header.misc = PERF_RECORD_MISC_USER; + event.header.size = len; + + event.mmap2.pid = data->pid; + event.mmap2.tid = data->pid; + event.mmap2.maj = MMAP_DEV_MAJOR; + event.mmap2.ino = dso->ino; + + strcpy(event.mmap2.filename, dso->name); + + event.mmap2.start = dso_map_addr(dso); + event.mmap2.len = 4096; + event.mmap2.prot = PROT_EXEC; + + if (len > sizeof(event.mmap2)) { + /* write mmap2 event first */ + writen(data->input_pipe[1], &event, len - bench_id_hdr_size); + /* zero-fill sample id header */ + memset(id_hdr_ptr, 0, bench_id_hdr_size); + /* put timestamp in the right position */ + ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2; + id_hdr_ptr[ts_idx] = timestamp; + writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size); + } else { + ts_idx = (len / sizeof(u64)) - 2; + id_hdr_ptr[ts_idx] = timestamp; + writen(data->input_pipe[1], &event, len); + } + return len; +} + +static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso, + u64 timestamp) +{ + union perf_event event; + struct perf_sample sample = { + .tid = data->pid, + .pid = data->pid, + .ip = dso_map_addr(dso), + .time = timestamp, + }; + + event.header.type = PERF_RECORD_SAMPLE; + event.header.misc = PERF_RECORD_MISC_USER; + event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0); + + perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample); + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_flush(struct bench_data *data) +{ + struct perf_event_header header = { + .size = sizeof(header), + .type = PERF_RECORD_FINISHED_ROUND, + }; + + return writen(data->input_pipe[1], &header, header.size); +} + +static void *data_reader(void *arg) +{ + struct bench_data *data = arg; + char buf[8192]; + int flag; + int n; + + flag = fcntl(data->output_pipe[0], F_GETFL); + fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK); + + /* read out data from child */ + while (true) { + n = read(data->output_pipe[0], buf, sizeof(buf)); + if (n > 0) + continue; + if (n == 0) + break; + + if (errno != EINTR && errno != EAGAIN) + break; + + usleep(100); + } + + close(data->output_pipe[0]); + return NULL; +} + +static int setup_injection(struct bench_data *data) +{ + int ready_pipe[2]; + int dev_null_fd; + char buf; + + if (pipe(ready_pipe) < 0) + return -1; + + if (pipe(data->input_pipe) < 0) + return -1; + + if (pipe(data->output_pipe) < 0) + return -1; + + data->pid = fork(); + if (data->pid < 0) + return -1; + + if (data->pid == 0) { + const char **inject_argv; + + close(data->input_pipe[1]); + close(data->output_pipe[0]); + close(ready_pipe[0]); + + dup2(data->input_pipe[0], STDIN_FILENO); + close(data->input_pipe[0]); + dup2(data->output_pipe[1], STDOUT_FILENO); + close(data->output_pipe[1]); + + dev_null_fd = open("/dev/null", O_WRONLY); + if (dev_null_fd < 0) + exit(1); + + dup2(dev_null_fd, STDERR_FILENO); + + inject_argv = calloc(3, sizeof(*inject_argv)); + if (inject_argv == NULL) + exit(1); + + inject_argv[0] = strdup("inject"); + inject_argv[1] = strdup("-b"); + + /* signal that we're ready to go */ + close(ready_pipe[1]); + + cmd_inject(2, inject_argv); + + exit(0); + } + + pthread_create(&data->th, NULL, data_reader, data); + + close(ready_pipe[1]); + close(data->input_pipe[0]); + close(data->output_pipe[1]); + + /* wait for child ready */ + if (read(ready_pipe[0], &buf, 1) < 0) + return -1; + close(ready_pipe[0]); + + return 0; +} + +static int inject_build_id(struct bench_data *data, u64 *max_rss) +{ + int status; + unsigned int i, k; + struct rusage rusage; + u64 len = 0; + + /* this makes the child to run */ + if (perf_header__write_pipe(data->input_pipe[1]) < 0) + return -1; + + len += synthesize_attr(data); + len += synthesize_fork(data); + + for (i = 0; i < nr_mmaps; i++) { + int idx = rand() % (nr_dsos - 1); + struct bench_dso *dso = &dsos[idx]; + u64 timestamp = rand() % 1000000; + + pr_debug2(" [%d] injecting: %s\n", i+1, dso->name); + len += synthesize_mmap(data, dso, timestamp); + + for (k = 0; k < nr_samples; k++) + len += synthesize_sample(data, dso, timestamp + k * 1000); + + if ((i + 1) % 10 == 0) + len += synthesize_flush(data); + } + + /* tihs makes the child to finish */ + close(data->input_pipe[1]); + + wait4(data->pid, &status, 0, &rusage); + *max_rss = rusage.ru_maxrss; + + pr_debug(" Child %d exited with %d\n", data->pid, status); + + return 0; +} + +static int do_inject_loop(struct bench_data *data) +{ + unsigned int i; + struct stats time_stats, mem_stats; + double time_average, time_stddev; + double mem_average, mem_stddev; + + srand(time(NULL)); + init_stats(&time_stats); + init_stats(&mem_stats); + symbol__init(NULL); + + bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP; + bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME; + bench_id_hdr_size = 32; + + collect_dso(); + if (nr_dsos == 0) { + printf(" Cannot collect DSOs for injection\n"); + return -1; + } + + for (i = 0; i < iterations; i++) { + struct timeval start, end, diff; + u64 runtime_us, max_rss; + + pr_debug(" Iteration #%d\n", i+1); + + if (setup_injection(data) < 0) { + printf(" Build-id injection setup failed\n"); + break; + } + + gettimeofday(&start, NULL); + if (inject_build_id(data, &max_rss) < 0) { + printf(" Build-id injection failed\n"); + break; + } + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&mem_stats, max_rss); + + pthread_join(data->th, NULL); + } + + time_average = avg_stats(&time_stats) / USEC_PER_MSEC; + time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC; + printf(" Average build-id injection took: %.3f msec (+- %.3f msec)\n", + time_average, time_stddev); + + /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */ + time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); + time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); + printf(" Average time per event: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + mem_average = avg_stats(&mem_stats); + mem_stddev = stddev_stats(&mem_stats); + printf(" Average memory usage: %.0f KB (+- %.0f KB)\n", + mem_average, mem_stddev); + + release_dso(); + return 0; +} + +int bench_inject_build_id(int argc, const char **argv) +{ + struct bench_data data; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + return do_inject_loop(&data); +} + diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 4f176039fc8f..62a7b7420a44 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -87,6 +87,7 @@ static struct bench epoll_benchmarks[] = { static struct bench internals_benchmarks[] = { { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, { "kallsyms-parse", "Benchmark kallsyms parsing", bench_kallsyms_parse }, + { "inject-build-id", "Benchmark build-id injection", bench_inject_build_id }, { NULL, NULL, NULL } }; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6d2f410d773a..e4d78f11494e 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -441,11 +441,10 @@ static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, return 0; } -static int perf_event__inject_buildid(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct evsel *evsel __maybe_unused, - struct machine *machine) +int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel __maybe_unused, + struct machine *machine) { struct addr_location al; struct thread *thread; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index aad419bb165c..949f7e54c9cb 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -29,6 +29,10 @@ int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, int dsos__hit_all(struct perf_session *session); +int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct evsel *evsel, + struct machine *machine); + bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, struct feat_fd *fd); -- cgit v1.3.1 From 2946ecedd026ca3bb6b6415de487e2836037d06f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Oct 2020 16:02:10 +0900 Subject: perf inject: Add missing callbacks in perf_tool I found some events (like PERF_RECORD_CGROUP) are not copied by perf inject due to the missing callbacks. Let's add them. While at it, I've changed the order of the callbacks to match with struct perf_tool so that we can compare them easily. Acked-by: Jiri Olsa Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20201012070214.2074921-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index e4d78f11494e..2c5e23d73a8a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -97,6 +97,13 @@ static int perf_event__repipe_op2_synth(struct perf_session *session, return perf_event__repipe_synth(session->tool, event); } +static int perf_event__repipe_op4_synth(struct perf_session *session, + union perf_event *event, + u64 data __maybe_unused) +{ + return perf_event__repipe_synth(session->tool, event); +} + static int perf_event__repipe_attr(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist) @@ -115,6 +122,13 @@ static int perf_event__repipe_attr(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +static int perf_event__repipe_event_update(struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist __maybe_unused) +{ + return perf_event__repipe_synth(tool, event); +} + #ifdef HAVE_AUXTRACE_SUPPORT static int copy_bytes(struct perf_inject *inject, int fd, off_t size) @@ -707,9 +721,12 @@ int cmd_inject(int argc, const char **argv) struct perf_inject inject = { .tool = { .sample = perf_event__repipe_sample, + .read = perf_event__repipe_sample, .mmap = perf_event__repipe, .mmap2 = perf_event__repipe, .comm = perf_event__repipe, + .namespaces = perf_event__repipe, + .cgroup = perf_event__repipe, .fork = perf_event__repipe, .exit = perf_event__repipe, .lost = perf_event__repipe, @@ -717,19 +734,28 @@ int cmd_inject(int argc, const char **argv) .aux = perf_event__repipe, .itrace_start = perf_event__repipe, .context_switch = perf_event__repipe, - .read = perf_event__repipe_sample, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, + .ksymbol = perf_event__repipe, + .bpf = perf_event__repipe, + .text_poke = perf_event__repipe, .attr = perf_event__repipe_attr, + .event_update = perf_event__repipe_event_update, .tracing_data = perf_event__repipe_op2_synth, - .auxtrace_info = perf_event__repipe_op2_synth, - .auxtrace = perf_event__repipe_auxtrace, - .auxtrace_error = perf_event__repipe_op2_synth, - .time_conv = perf_event__repipe_op2_synth, .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, + .auxtrace_info = perf_event__repipe_op2_synth, + .auxtrace_error = perf_event__repipe_op2_synth, + .time_conv = perf_event__repipe_op2_synth, + .thread_map = perf_event__repipe_op2_synth, + .cpu_map = perf_event__repipe_op2_synth, + .stat_config = perf_event__repipe_op2_synth, + .stat = perf_event__repipe_op2_synth, + .stat_round = perf_event__repipe_op2_synth, .feature = perf_event__repipe_op2_synth, + .compressed = perf_event__repipe_op4_synth, + .auxtrace = perf_event__repipe_auxtrace, }, .input_name = "-", .samples = LIST_HEAD_INIT(inject.samples), -- cgit v1.3.1 From 336c95b297e8127705e590bbd25f5c627bfcb782 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Oct 2020 16:02:11 +0900 Subject: perf inject: Enter namespace when reading build-id It should be in a proper mnt namespace when accessing the file. I think this had no problem since the build-id was actually read from map__load() -> dso__load() already. But I'd like to change it in the following commit. Acked-by: Jiri Olsa Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20201012070214.2074921-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 2c5e23d73a8a..670157db2502 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -23,6 +23,7 @@ #include "util/symbol.h" #include "util/synthetic-events.h" #include "util/thread.h" +#include "util/namespaces.h" #include #include @@ -419,16 +420,19 @@ static int perf_event__repipe_tracing_data(struct perf_session *session, static int dso__read_build_id(struct dso *dso) { + struct nscookie nsc; + if (dso->has_build_id) return 0; + nsinfo__mountns_enter(dso->nsinfo, &nsc); if (filename__read_build_id(dso->long_name, dso->build_id, sizeof(dso->build_id)) > 0) { dso->has_build_id = true; - return 0; } + nsinfo__mountns_exit(&nsc); - return -1; + return dso->has_build_id ? 0 : -1; } static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, -- cgit v1.3.1 From e7b60c5a0c4b9c02fa3b471f8d5edb4989ebdf60 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Oct 2020 16:02:12 +0900 Subject: perf inject: Do not load map/dso when injecting build-id No need to load symbols in a DSO when injecting build-id. I guess the reason was to check the DSO is a special file like anon files. Use some helper functions in map.c to check them before reading build-id. Also pass sample event's cpumode to a new build-id event. It brought a speedup in the benchmark of 25 -> 21 msec on my laptop. Also the memory usage (Max RSS) went down by ~200 KB. # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 21.389 msec (+- 0.138 msec) Average time per event: 2.097 usec (+- 0.014 usec) Average memory usage: 8225 KB (+- 0 KB) Committer notes: Before: $ perf stat -r5 perf bench internals inject-build-id > /dev/null Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 4,020.56 msec task-clock:u # 1.271 CPUs utilized ( +- 0.74% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 123,354 page-faults:u # 0.031 M/sec ( +- 0.81% ) 7,119,951,568 cycles:u # 1.771 GHz ( +- 1.74% ) (83.27%) 230,086,969 stalled-cycles-frontend:u # 3.23% frontend cycles idle ( +- 1.97% ) (83.41%) 1,168,298,765 stalled-cycles-backend:u # 16.41% backend cycles idle ( +- 1.13% ) (83.44%) 11,173,083,669 instructions:u # 1.57 insn per cycle # 0.10 stalled cycles per insn ( +- 1.58% ) (83.31%) 2,413,908,936 branches:u # 600.392 M/sec ( +- 1.69% ) (83.26%) 46,576,289 branch-misses:u # 1.93% of all branches ( +- 2.20% ) (83.31%) 3.1638 +- 0.0309 seconds time elapsed ( +- 0.98% ) $ After: $ perf stat -r5 perf bench internals inject-build-id > /dev/null Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 2,379.94 msec task-clock:u # 1.473 CPUs utilized ( +- 0.18% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 62,584 page-faults:u # 0.026 M/sec ( +- 0.07% ) 2,372,389,668 cycles:u # 0.997 GHz ( +- 0.29% ) (83.14%) 106,937,862 stalled-cycles-frontend:u # 4.51% frontend cycles idle ( +- 4.89% ) (83.20%) 581,697,915 stalled-cycles-backend:u # 24.52% backend cycles idle ( +- 0.71% ) (83.47%) 3,659,692,199 instructions:u # 1.54 insn per cycle # 0.16 stalled cycles per insn ( +- 0.10% ) (83.63%) 791,372,961 branches:u # 332.518 M/sec ( +- 0.27% ) (83.39%) 10,648,083 branch-misses:u # 1.35% of all branches ( +- 0.22% ) (83.16%) 1.61570 +- 0.00172 seconds time elapsed ( +- 0.11% ) $ Signed-off-by: Namhyung Kim Original-patch-by: Stephane Eranian Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201012070214.2074921-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 33 ++++++++++++--------------------- tools/perf/util/map.c | 17 +---------------- tools/perf/util/map.h | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 670157db2502..248cd9f3e5bb 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -24,9 +24,10 @@ #include "util/synthetic-events.h" #include "util/thread.h" #include "util/namespaces.h" -#include +#include #include +#include /* To get things like MAP_HUGETLB even on older libc headers */ #include #include @@ -436,21 +437,22 @@ static int dso__read_build_id(struct dso *dso) } static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, - struct machine *machine) + struct machine *machine, u8 cpumode, u32 flags) { - u16 misc = PERF_RECORD_MISC_USER; int err; + if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB) + return 0; + if (is_no_dso_memory(dso->long_name)) + return 0; + if (dso__read_build_id(dso) < 0) { pr_debug("no build_id found for %s\n", dso->long_name); return -1; } - if (dso->kernel) - misc = PERF_RECORD_MISC_KERNEL; - - err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe, - machine); + err = perf_event__synthesize_build_id(tool, dso, cpumode, + perf_event__repipe, machine); if (err) { pr_err("Can't synthesize build_id event for %s\n", dso->long_name); return -1; @@ -477,19 +479,8 @@ int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { if (!al.map->dso->hit) { al.map->dso->hit = 1; - if (map__load(al.map) >= 0) { - dso__inject_build_id(al.map->dso, tool, machine); - /* - * If this fails, too bad, let the other side - * account this as unresolved. - */ - } else { -#ifdef HAVE_LIBELF_SUPPORT - pr_warning("no symbols found in %s, maybe " - "install a debug package?\n", - al.map->dso->long_name); -#endif - } + dso__inject_build_id(al.map->dso, tool, machine, + sample->cpumode, al.map->flags); } } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index cc0faf8f1321..8b305e624124 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -27,21 +27,6 @@ static void __maps__insert(struct maps *maps, struct map *map); -static inline int is_anon_memory(const char *filename, u32 flags) -{ - return flags & MAP_HUGETLB || - !strcmp(filename, "//anon") || - !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) || - !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1); -} - -static inline int is_no_dso_memory(const char *filename) -{ - return !strncmp(filename, "[stack", 6) || - !strncmp(filename, "/SYSV",5) || - !strcmp(filename, "[heap]"); -} - static inline int is_android_lib(const char *filename) { return strstarts(filename, "/data/app-lib/") || @@ -158,7 +143,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, int anon, no_dso, vdso, android; android = is_android_lib(filename); - anon = is_anon_memory(filename, flags); + anon = is_anon_memory(filename) || flags & MAP_HUGETLB; vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); map->prot = prot; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index c2f5d28fe73a..b1c0686db1b7 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -171,4 +171,18 @@ static inline bool is_bpf_image(const char *name) return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 || strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0; } + +static inline int is_anon_memory(const char *filename) +{ + return !strcmp(filename, "//anon") || + !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) || + !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1); +} + +static inline int is_no_dso_memory(const char *filename) +{ + return !strncmp(filename, "[stack", 6) || + !strncmp(filename, "/SYSV", 5) || + !strcmp(filename, "[heap]"); +} #endif /* __PERF_MAP_H */ -- cgit v1.3.1 From 27c9c3424fc217dab4077122069fe4b4413dbf18 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Oct 2020 16:02:13 +0900 Subject: perf inject: Add --buildid-all option Like 'perf record', we can even more speedup build-id processing by just using all DSOs. Then we don't need to look at all the sample events anymore. The following patch will update 'perf bench' to show the result of the --buildid-all option too. Signed-off-by: Namhyung Kim Original-patch-by: Stephane Eranian Acked-by: Ian Rogers Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201012070214.2074921-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 6 +- tools/perf/builtin-inject.c | 113 +++++++++++++++++++++++++++++-- 2 files changed, 113 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 70969ea73e01..a8eccff21281 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -24,8 +24,12 @@ information could make use of this facility. OPTIONS ------- -b:: ---build-ids=:: +--build-ids:: Inject build-ids into the output stream + +--buildid-all: + Inject build-ids of all DSOs into the output stream + -v:: --verbose:: Be more verbose. diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 248cd9f3e5bb..f3f965157d69 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -10,6 +10,7 @@ #include "util/color.h" #include "util/dso.h" +#include "util/vdso.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/map.h" @@ -37,6 +38,7 @@ struct perf_inject { struct perf_tool tool; struct perf_session *session; bool build_ids; + bool build_id_all; bool sched_stat; bool have_auxtrace; bool strip; @@ -56,6 +58,9 @@ struct event_entry { union perf_event event[]; }; +static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, + struct machine *machine, u8 cpumode, u32 flags); + static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) { ssize_t size; @@ -319,6 +324,68 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool, } #endif +static struct dso *findnew_dso(int pid, int tid, const char *filename, + struct dso_id *id, struct machine *machine) +{ + struct thread *thread; + struct nsinfo *nsi = NULL; + struct nsinfo *nnsi; + struct dso *dso; + bool vdso; + + thread = machine__findnew_thread(machine, pid, tid); + if (thread == NULL) { + pr_err("cannot find or create a task %d/%d.\n", tid, pid); + return NULL; + } + + vdso = is_vdso_map(filename); + nsi = nsinfo__get(thread->nsinfo); + + if (vdso) { + /* The vdso maps are always on the host and not the + * container. Ensure that we don't use setns to look + * them up. + */ + nnsi = nsinfo__copy(nsi); + if (nnsi) { + nsinfo__put(nsi); + nnsi->need_setns = false; + nsi = nnsi; + } + dso = machine__findnew_vdso(machine, thread); + } else { + dso = machine__findnew_dso_id(machine, filename, id); + } + + if (dso) + dso->nsinfo = nsi; + else + nsinfo__put(nsi); + + thread__put(thread); + return dso; +} + +static int perf_event__repipe_buildid_mmap(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct dso *dso; + + dso = findnew_dso(event->mmap.pid, event->mmap.tid, + event->mmap.filename, NULL, machine); + + if (dso && !dso->hit) { + dso->hit = 1; + dso__inject_build_id(dso, tool, machine, sample->cpumode, 0); + dso__put(dso); + } + + return perf_event__repipe(tool, event, sample, machine); +} + static int perf_event__repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -357,6 +424,34 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, } #endif +static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct dso_id dso_id = { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }; + struct dso *dso; + + dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, + event->mmap2.filename, &dso_id, machine); + + if (dso && !dso->hit) { + dso->hit = 1; + dso__inject_build_id(dso, tool, machine, sample->cpumode, + event->mmap2.flags); + dso__put(dso); + } + + perf_event__repipe(tool, event, sample, machine); + + return 0; +} + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -614,7 +709,7 @@ static int __cmd_inject(struct perf_inject *inject) signal(SIGINT, sig_handler); if (inject->build_ids || inject->sched_stat || - inject->itrace_synth_opts.set) { + inject->itrace_synth_opts.set || inject->build_id_all) { inject->tool.mmap = perf_event__repipe_mmap; inject->tool.mmap2 = perf_event__repipe_mmap2; inject->tool.fork = perf_event__repipe_fork; @@ -623,7 +718,10 @@ static int __cmd_inject(struct perf_inject *inject) output_data_offset = session->header.data_offset; - if (inject->build_ids) { + if (inject->build_id_all) { + inject->tool.mmap = perf_event__repipe_buildid_mmap; + inject->tool.mmap2 = perf_event__repipe_buildid_mmap2; + } else if (inject->build_ids) { inject->tool.sample = perf_event__inject_buildid; } else if (inject->sched_stat) { struct evsel *evsel; @@ -767,6 +865,8 @@ int cmd_inject(int argc, const char **argv) struct option options[] = { OPT_BOOLEAN('b', "build-ids", &inject.build_ids, "Inject build-ids into the output stream"), + OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all, + "Inject build-ids of all DSOs into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", "input file name"), OPT_STRING('o', "output", &inject.output.path, "file", @@ -815,8 +915,6 @@ int cmd_inject(int argc, const char **argv) return -1; } - inject.tool.ordered_events = inject.sched_stat; - data.path = inject.input_name; inject.session = perf_session__new(&data, true, &inject.tool); if (IS_ERR(inject.session)) @@ -825,7 +923,7 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); - if (inject.build_ids) { + if (inject.build_ids && !inject.build_id_all) { /* * to make sure the mmap records are ordered correctly * and so that the correct especially due to jitted code @@ -835,6 +933,11 @@ int cmd_inject(int argc, const char **argv) inject.tool.ordered_events = true; inject.tool.ordering_requires_timestamps = true; } + + if (inject.sched_stat) { + inject.tool.ordered_events = true; + } + #ifdef HAVE_JITDUMP if (inject.jit_mode) { inject.tool.mmap2 = perf_event__jit_repipe_mmap2; -- cgit v1.3.1 From bf7ef5ddb0b327099404d3e2af4b3b142c702813 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Oct 2020 16:02:14 +0900 Subject: perf bench: Run inject-build-id with --buildid-all option too For comparison, it now runs the benchmark twice - one if regular -b and another for --buildid-all. $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 21.002 msec (+- 0.172 msec) Average time per event: 2.059 usec (+- 0.017 usec) Average memory usage: 8169 KB (+- 0 KB) Average build-id-all injection took: 19.543 msec (+- 0.124 msec) Average time per event: 1.916 usec (+- 0.012 usec) Average memory usage: 7348 KB (+- 0 KB) Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201012070214.2074921-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/inject-buildid.c | 54 +++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 0fccf2a9e95b..e9a11f4a1109 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -271,7 +271,7 @@ static void *data_reader(void *arg) return NULL; } -static int setup_injection(struct bench_data *data) +static int setup_injection(struct bench_data *data, bool build_id_all) { int ready_pipe[2]; int dev_null_fd; @@ -292,6 +292,7 @@ static int setup_injection(struct bench_data *data) if (data->pid == 0) { const char **inject_argv; + int inject_argc = 2; close(data->input_pipe[1]); close(data->output_pipe[0]); @@ -308,17 +309,22 @@ static int setup_injection(struct bench_data *data) dup2(dev_null_fd, STDERR_FILENO); - inject_argv = calloc(3, sizeof(*inject_argv)); + if (build_id_all) + inject_argc++; + + inject_argv = calloc(inject_argc + 1, sizeof(*inject_argv)); if (inject_argv == NULL) exit(1); inject_argv[0] = strdup("inject"); inject_argv[1] = strdup("-b"); + if (build_id_all) + inject_argv[2] = strdup("--buildid-all"); /* signal that we're ready to go */ close(ready_pipe[1]); - cmd_inject(2, inject_argv); + cmd_inject(inject_argc, inject_argv); exit(0); } @@ -377,27 +383,17 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss) return 0; } -static int do_inject_loop(struct bench_data *data) +static void do_inject_loop(struct bench_data *data, bool build_id_all) { unsigned int i; struct stats time_stats, mem_stats; double time_average, time_stddev; double mem_average, mem_stddev; - srand(time(NULL)); init_stats(&time_stats); init_stats(&mem_stats); - symbol__init(NULL); - bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP; - bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME; - bench_id_hdr_size = 32; - - collect_dso(); - if (nr_dsos == 0) { - printf(" Cannot collect DSOs for injection\n"); - return -1; - } + pr_debug(" Build-id%s injection benchmark\n", build_id_all ? "-all" : ""); for (i = 0; i < iterations; i++) { struct timeval start, end, diff; @@ -405,7 +401,7 @@ static int do_inject_loop(struct bench_data *data) pr_debug(" Iteration #%d\n", i+1); - if (setup_injection(data) < 0) { + if (setup_injection(data, build_id_all) < 0) { printf(" Build-id injection setup failed\n"); break; } @@ -427,8 +423,8 @@ static int do_inject_loop(struct bench_data *data) time_average = avg_stats(&time_stats) / USEC_PER_MSEC; time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC; - printf(" Average build-id injection took: %.3f msec (+- %.3f msec)\n", - time_average, time_stddev); + printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n", + build_id_all ? "-all" : "", time_average, time_stddev); /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */ time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); @@ -440,6 +436,26 @@ static int do_inject_loop(struct bench_data *data) mem_stddev = stddev_stats(&mem_stats); printf(" Average memory usage: %.0f KB (+- %.0f KB)\n", mem_average, mem_stddev); +} + +static int do_inject_loops(struct bench_data *data) +{ + + srand(time(NULL)); + symbol__init(NULL); + + bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP; + bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME; + bench_id_hdr_size = 32; + + collect_dso(); + if (nr_dsos == 0) { + printf(" Cannot collect DSOs for injection\n"); + return -1; + } + + do_inject_loop(data, false); + do_inject_loop(data, true); release_dso(); return 0; @@ -455,6 +471,6 @@ int bench_inject_build_id(int argc, const char **argv) exit(EXIT_FAILURE); } - return do_inject_loop(&data); + return do_inject_loops(&data); } -- cgit v1.3.1 From 70830f974e8db9b43e61d9b223682e13e557022f Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 12 Oct 2020 10:32:05 +0530 Subject: perf vendor events: Fix typos in power8 PMU events This replaces the incorrectly spelled word "localtion" with "location" in some power8 PMU event descriptions. Fixes: 2a81fa3bb5ed ("perf vendor events: Add power8 PMU events") Signed-off-by: Sandipan Das Reviewed-by: Kajol Jain Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Ravi Bangoria Cc: Sukadev Bhattiprolu Link: http://lore.kernel.org/lkml/20201012050205.328523-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/powerpc/power8/cache.json | 10 +++++----- tools/perf/pmu-events/arch/powerpc/power8/frontend.json | 12 ++++++------ tools/perf/pmu-events/arch/powerpc/power8/marked.json | 10 +++++----- tools/perf/pmu-events/arch/powerpc/power8/other.json | 16 ++++++++-------- .../perf/pmu-events/arch/powerpc/power8/translation.json | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/powerpc/power8/cache.json b/tools/perf/pmu-events/arch/powerpc/power8/cache.json index 6b792b2c87e2..05a17084d939 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/cache.json @@ -32,8 +32,8 @@ { "EventCode": "0x1c04e", "EventName": "PM_DATA_FROM_L2MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to a demand load", - "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load", + "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" }, { "EventCode": "0x3c040", @@ -74,8 +74,8 @@ { "EventCode": "0x4c04e", "EventName": "PM_DATA_FROM_L3MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a demand load", - "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load", + "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" }, { "EventCode": "0x3c042", @@ -134,7 +134,7 @@ { "EventCode": "0x4e04e", "EventName": "PM_DPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a data side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request", "PublicDescription": "" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json index 1ddc30655d43..1c902a8263b6 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json @@ -116,8 +116,8 @@ { "EventCode": "0x1404e", "EventName": "PM_INST_FROM_L2MISS", - "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to an instruction fetch (not prefetch)", - "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)", + "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" }, { "EventCode": "0x34040", @@ -158,8 +158,8 @@ { "EventCode": "0x4404e", "EventName": "PM_INST_FROM_L3MISS_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch", - "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch", + "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" }, { "EventCode": "0x34042", @@ -320,7 +320,7 @@ { "EventCode": "0x1504e", "EventName": "PM_IPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a instruction side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request", "PublicDescription": "" }, { @@ -344,7 +344,7 @@ { "EventCode": "0x4504e", "EventName": "PM_IPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a instruction side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request", "PublicDescription": "" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power8/marked.json b/tools/perf/pmu-events/arch/powerpc/power8/marked.json index 94dc58b83b7e..6de61a797bbd 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/marked.json @@ -92,7 +92,7 @@ { "EventCode": "0x4c12e", "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC", - "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L2 due to a marked load", + "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load", "PublicDescription": "" }, { @@ -158,13 +158,13 @@ { "EventCode": "0x201e4", "EventName": "PM_MRK_DATA_FROM_L3MISS", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a marked load", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load", "PublicDescription": "" }, { "EventCode": "0x2d12e", "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC", - "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L3 due to a marked load", + "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load", "PublicDescription": "" }, { @@ -392,7 +392,7 @@ { "EventCode": "0x1f14e", "EventName": "PM_MRK_DPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a marked data side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request", "PublicDescription": "" }, { @@ -416,7 +416,7 @@ { "EventCode": "0x4f14e", "EventName": "PM_MRK_DPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a marked data side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request", "PublicDescription": "" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json index f4e760cab111..84a0cedf1fd9 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json @@ -410,8 +410,8 @@ { "EventCode": "0x61c04e", "EventName": "PM_DATA_ALL_FROM_L2MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either demand loads or data prefetch", - "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either demand loads or data prefetch", + "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" }, { "EventCode": "0x63c040", @@ -470,8 +470,8 @@ { "EventCode": "0x64c04e", "EventName": "PM_DATA_ALL_FROM_L3MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either demand loads or data prefetch", - "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either demand loads or data prefetch", + "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" }, { "EventCode": "0x63c042", @@ -1280,8 +1280,8 @@ { "EventCode": "0x51404e", "EventName": "PM_INST_ALL_FROM_L2MISS", - "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to instruction fetches and prefetches", - "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to instruction fetches and prefetches", + "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" }, { "EventCode": "0x534040", @@ -1340,8 +1340,8 @@ { "EventCode": "0x54404e", "EventName": "PM_INST_ALL_FROM_L3MISS_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch", - "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch", + "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" }, { "EventCode": "0x534042", diff --git a/tools/perf/pmu-events/arch/powerpc/power8/translation.json b/tools/perf/pmu-events/arch/powerpc/power8/translation.json index 623e7475b010..a1657f5fdc6b 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/translation.json @@ -44,7 +44,7 @@ { "EventCode": "0x1e04e", "EventName": "PM_DPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a data side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request", "PublicDescription": "" }, { -- cgit v1.3.1 From dc000c4593a9ce119ba7db2c4d9a00f5a9daf731 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 25 Sep 2020 19:56:34 -0400 Subject: perf sched: Show start of latency as well The 'perf sched latency' tool is really useful at showing worst-case latencies that task encountered since wakeup. However it shows only the end of the latency. Often times the start of a latency is interesting as it can show what else was going on at the time to cause the latency. I certainly myself spending a lot of time backtracking to the start of the latency in "perf sched script" which wastes a lot of time. This patch therefore adds a new column "Max delay start". Considering this, also rename "Maximum delay at" to "Max delay end" as its easier to understand. Example of the new output: ---------------------------------------------------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end | ---------------------------------------------------------------------------------------------------------------------------------- MediaScannerSer:11936 | 651.296 ms | 67978 | avg: 0.113 ms | max: 77.250 ms | max start: 477.691360 s | max end: 477.768610 s audio@2.0-servi:(3) | 0.000 ms | 3440 | avg: 0.034 ms | max: 72.267 ms | max start: 477.697051 s | max end: 477.769318 s AudioOut_1D:8112 | 0.000 ms | 2588 | avg: 0.083 ms | max: 64.020 ms | max start: 477.710740 s | max end: 477.774760 s Time-limited te:14973 | 7966.090 ms | 24807 | avg: 0.073 ms | max: 15.563 ms | max start: 477.162746 s | max end: 477.178309 s surfaceflinger:8049 | 9.680 ms | 603 | avg: 0.063 ms | max: 13.275 ms | max start: 476.931791 s | max end: 476.945067 s HeapTaskDaemon:(3) | 1588.830 ms | 7040 | avg: 0.065 ms | max: 6.880 ms | max start: 473.666043 s | max end: 473.672922 s mount-passthrou:(3) | 1370.809 ms | 68904 | avg: 0.011 ms | max: 6.524 ms | max start: 478.090630 s | max end: 478.097154 s ReferenceQueueD:(3) | 11.794 ms | 1725 | avg: 0.014 ms | max: 6.521 ms | max start: 476.119782 s | max end: 476.126303 s writer:14077 | 18.410 ms | 1427 | avg: 0.036 ms | max: 6.131 ms | max start: 474.169675 s | max end: 474.175805 s Signed-off-by: Joel Fernandes (Google) Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20200925235634.4089867-1-joel@joelfernandes.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e6fc297cee91..0e16f9d5a947 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -130,7 +130,8 @@ struct work_atoms { struct thread *thread; struct rb_node node; u64 max_lat; - u64 max_lat_at; + u64 max_lat_start; + u64 max_lat_end; u64 total_lat; u64 nb_atoms; u64 total_runtime; @@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->total_lat += delta; if (delta > atoms->max_lat) { atoms->max_lat = delta; - atoms->max_lat_at = timestamp; + atoms->max_lat_start = atom->wake_up_time; + atoms->max_lat_end = timestamp; } atoms->nb_atoms++; } @@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ int i; int ret; u64 avg; - char max_lat_at[32]; + char max_lat_start[32], max_lat_end[32]; if (!work_list->nb_atoms) return; @@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ printf(" "); avg = work_list->total_lat / work_list->nb_atoms; - timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); + timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start)); + timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end)); - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", + printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n", (double)work_list->total_runtime / NSEC_PER_MSEC, work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, (double)work_list->max_lat / NSEC_PER_MSEC, - max_lat_at); + max_lat_start, max_lat_end); } static int pid_cmp(struct work_atoms *l, struct work_atoms *r) @@ -3137,7 +3140,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d list_splice(&data->work_list, &this->work_list); if (this->max_lat < data->max_lat) { this->max_lat = data->max_lat; - this->max_lat_at = data->max_lat_at; + this->max_lat_start = data->max_lat_start; + this->max_lat_end = data->max_lat_end; } zfree(&data); return; @@ -3176,9 +3180,9 @@ static int perf_sched__lat(struct perf_sched *sched) perf_sched__merge_lat(sched); perf_sched__sort_lat(sched); - printf("\n -----------------------------------------------------------------------------------------------------------------\n"); - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); - printf(" -----------------------------------------------------------------------------------------------------------------\n"); + printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n"); + printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n"); + printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n"); next = rb_first_cached(&sched->sorted_atom_root); -- cgit v1.3.1 From a41c32105cc12a7d2ae11a591929a6add98987cd Mon Sep 17 00:00:00 2001 From: "Tzvetomir Stoyanov (VMware)" Date: Wed, 30 Sep 2020 14:07:33 +0300 Subject: tools lib traceevent: Hide non API functions There are internal library functions, which are not declared as a static. They are used inside the library from different files. Hide them from the library users, as they are not part of the API. These functions are made hidden and are renamed without the prefix "tep_": tep_free_plugin_paths tep_peek_char tep_buffer_init tep_get_input_buf_ptr tep_get_input_buf tep_read_token tep_free_token tep_free_event tep_free_format_field __tep_parse_format Link: https://lore.kernel.org/linux-trace-devel/e4afdd82deb5e023d53231bb13e08dca78085fb0.camel@decadent.org.uk/ Reported-by: Ben Hutchings Signed-off-by: Tzvetomir Stoyanov (VMware) Reviewed-by: Steven Rostedt (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20200930110733.280534-1-tz.stoyanov@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse-api.c | 8 +- tools/lib/traceevent/event-parse-local.h | 24 ++++-- tools/lib/traceevent/event-parse.c | 125 +++++++++++++------------------ tools/lib/traceevent/event-parse.h | 8 -- tools/lib/traceevent/event-plugin.c | 2 +- tools/lib/traceevent/parse-filter.c | 23 +++--- 6 files changed, 83 insertions(+), 107 deletions(-) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c index 4faf52a65791..f8361e45d446 100644 --- a/tools/lib/traceevent/event-parse-api.c +++ b/tools/lib/traceevent/event-parse-api.c @@ -92,7 +92,7 @@ bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag) return false; } -unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data) +__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data) { unsigned short swap; @@ -105,7 +105,7 @@ unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data) return swap; } -unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data) +__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data) { unsigned int swap; @@ -120,8 +120,8 @@ unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data) return swap; } -unsigned long long -tep_data2host8(struct tep_handle *tep, unsigned long long data) +__hidden unsigned long long +data2host8(struct tep_handle *tep, unsigned long long data) { unsigned long long swap; diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h index d805a920af6f..fd4bbcfbb849 100644 --- a/tools/lib/traceevent/event-parse-local.h +++ b/tools/lib/traceevent/event-parse-local.h @@ -15,6 +15,8 @@ struct event_handler; struct func_resolver; struct tep_plugins_dir; +#define __hidden __attribute__((visibility ("hidden"))) + struct tep_handle { int ref_count; @@ -102,12 +104,20 @@ struct tep_print_parse { struct tep_print_arg *len_as_arg; }; -void tep_free_event(struct tep_event *event); -void tep_free_format_field(struct tep_format_field *field); -void tep_free_plugin_paths(struct tep_handle *tep); - -unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data); -unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data); -unsigned long long tep_data2host8(struct tep_handle *tep, unsigned long long data); +void free_tep_event(struct tep_event *event); +void free_tep_format_field(struct tep_format_field *field); +void free_tep_plugin_paths(struct tep_handle *tep); + +unsigned short data2host2(struct tep_handle *tep, unsigned short data); +unsigned int data2host4(struct tep_handle *tep, unsigned int data); +unsigned long long data2host8(struct tep_handle *tep, unsigned long long data); + +/* access to the internal parser */ +int peek_char(void); +void init_input_buf(const char *buf, unsigned long long size); +unsigned long long get_input_buf_ptr(void); +const char *get_input_buf(void); +enum tep_event_type read_token(char **tok); +void free_token(char *tok); #endif /* _PARSE_EVENTS_INT_H */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 5acc18b32606..fe58843d047c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -54,19 +54,26 @@ static int show_warning = 1; warning(fmt, ##__VA_ARGS__); \ } while (0) -static void init_input_buf(const char *buf, unsigned long long size) +/** + * init_input_buf - init buffer for parsing + * @buf: buffer to parse + * @size: the size of the buffer + * + * Initializes the internal buffer that tep_read_token() will parse. + */ +__hidden void init_input_buf(const char *buf, unsigned long long size) { input_buf = buf; input_buf_siz = size; input_buf_ptr = 0; } -const char *tep_get_input_buf(void) +__hidden const char *get_input_buf(void) { return input_buf; } -unsigned long long tep_get_input_buf_ptr(void) +__hidden unsigned long long get_input_buf_ptr(void) { return input_buf_ptr; } @@ -100,26 +107,13 @@ process_defined_func(struct trace_seq *s, void *data, int size, static void free_func_handle(struct tep_function_handler *func); -/** - * tep_buffer_init - init buffer for parsing - * @buf: buffer to parse - * @size: the size of the buffer - * - * For use with tep_read_token(), this initializes the internal - * buffer that tep_read_token() will parse. - */ -void tep_buffer_init(const char *buf, unsigned long long size) -{ - init_input_buf(buf, size); -} - void breakpoint(void) { static int x; x++; } -struct tep_print_arg *alloc_arg(void) +static struct tep_print_arg *alloc_arg(void) { return calloc(1, sizeof(struct tep_print_arg)); } @@ -962,22 +956,17 @@ static int __read_char(void) return input_buf[input_buf_ptr++]; } -static int __peek_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr]; -} - /** - * tep_peek_char - peek at the next character that will be read + * peek_char - peek at the next character that will be read * * Returns the next character read, or -1 if end of buffer. */ -int tep_peek_char(void) +__hidden int peek_char(void) { - return __peek_char(); + if (input_buf_ptr >= input_buf_siz) + return -1; + + return input_buf[input_buf_ptr]; } static int extend_token(char **tok, char *buf, int size) @@ -1033,7 +1022,7 @@ static enum tep_event_type __read_token(char **tok) case TEP_EVENT_OP: switch (ch) { case '-': - next_ch = __peek_char(); + next_ch = peek_char(); if (next_ch == '>') { buf[i++] = __read_char(); break; @@ -1045,7 +1034,7 @@ static enum tep_event_type __read_token(char **tok) case '>': case '<': last_ch = ch; - ch = __peek_char(); + ch = peek_char(); if (ch != last_ch) goto test_equal; buf[i++] = __read_char(); @@ -1068,7 +1057,7 @@ static enum tep_event_type __read_token(char **tok) return type; test_equal: - ch = __peek_char(); + ch = peek_char(); if (ch == '=') buf[i++] = __read_char(); goto out; @@ -1122,7 +1111,7 @@ static enum tep_event_type __read_token(char **tok) break; } - while (get_type(__peek_char()) == type) { + while (get_type(peek_char()) == type) { if (i == (BUFSIZ - 1)) { buf[i] = 0; tok_size += BUFSIZ; @@ -1191,13 +1180,26 @@ static enum tep_event_type force_token(const char *str, char **tok) return type; } -static void free_token(char *tok) +/** + * free_token - free a token returned by tep_read_token + * @token: the token to free + */ +__hidden void free_token(char *tok) { if (tok) free(tok); } -static enum tep_event_type read_token(char **tok) +/** + * read_token - access to utilities to use the tep parser + * @tok: The token to return + * + * This will parse tokens from the string given by + * tep_init_data(). + * + * Returns the token type. + */ +__hidden enum tep_event_type read_token(char **tok) { enum tep_event_type type; @@ -1214,29 +1216,6 @@ static enum tep_event_type read_token(char **tok) return TEP_EVENT_NONE; } -/** - * tep_read_token - access to utilities to use the tep parser - * @tok: The token to return - * - * This will parse tokens from the string given by - * tep_init_data(). - * - * Returns the token type. - */ -enum tep_event_type tep_read_token(char **tok) -{ - return read_token(tok); -} - -/** - * tep_free_token - free a token returned by tep_read_token - * @token: the token to free - */ -void tep_free_token(char *token) -{ - free_token(token); -} - /* no newline */ static enum tep_event_type read_token_item(char **tok) { @@ -3459,12 +3438,12 @@ unsigned long long tep_read_number(struct tep_handle *tep, case 1: return *(unsigned char *)ptr; case 2: - return tep_data2host2(tep, *(unsigned short *)ptr); + return data2host2(tep, *(unsigned short *)ptr); case 4: - return tep_data2host4(tep, *(unsigned int *)ptr); + return data2host4(tep, *(unsigned int *)ptr); case 8: memcpy(&val, (ptr), sizeof(unsigned long long)); - return tep_data2host8(tep, val); + return data2host8(tep, val); default: /* BUG! */ return 0; @@ -4190,7 +4169,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, f = tep_find_any_field(event, arg->string.string); arg->string.offset = f->offset; } - str_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->string.offset)); + str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset)); str_offset &= 0xffff; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; @@ -4208,7 +4187,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, f = tep_find_any_field(event, arg->bitmask.bitmask); arg->bitmask.offset = f->offset; } - bitmask_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); + bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; print_bitmask_to_seq(tep, s, format, len_arg, @@ -6750,7 +6729,7 @@ static int find_event_handle(struct tep_handle *tep, struct tep_event *event) } /** - * __tep_parse_format - parse the event format + * parse_format - parse the event format * @buf: the buffer storing the event format string * @size: the size of @buf * @sys: the system the event belongs to @@ -6762,9 +6741,9 @@ static int find_event_handle(struct tep_handle *tep, struct tep_event *event) * * /sys/kernel/debug/tracing/events/.../.../format */ -enum tep_errno __tep_parse_format(struct tep_event **eventp, - struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys) +static enum tep_errno parse_format(struct tep_event **eventp, + struct tep_handle *tep, const char *buf, + unsigned long size, const char *sys) { struct tep_event *event; int ret; @@ -6879,7 +6858,7 @@ __parse_event(struct tep_handle *tep, const char *buf, unsigned long size, const char *sys) { - int ret = __tep_parse_format(eventp, tep, buf, size, sys); + int ret = parse_format(eventp, tep, buf, size, sys); struct tep_event *event = *eventp; if (event == NULL) @@ -6897,7 +6876,7 @@ __parse_event(struct tep_handle *tep, return 0; event_add_failed: - tep_free_event(event); + free_tep_event(event); return ret; } @@ -7490,7 +7469,7 @@ int tep_get_ref(struct tep_handle *tep) return 0; } -void tep_free_format_field(struct tep_format_field *field) +__hidden void free_tep_format_field(struct tep_format_field *field) { free(field->type); if (field->alias != field->name) @@ -7505,7 +7484,7 @@ static void free_format_fields(struct tep_format_field *field) while (field) { next = field->next; - tep_free_format_field(field); + free_tep_format_field(field); field = next; } } @@ -7516,7 +7495,7 @@ static void free_formats(struct tep_format *format) free_format_fields(format->fields); } -void tep_free_event(struct tep_event *event) +__hidden void free_tep_event(struct tep_event *event) { free(event->name); free(event->system); @@ -7602,7 +7581,7 @@ void tep_free(struct tep_handle *tep) } for (i = 0; i < tep->nr_events; i++) - tep_free_event(tep->events[i]); + free_tep_event(tep->events[i]); while (tep->handlers) { handle = tep->handlers; @@ -7613,7 +7592,7 @@ void tep_free(struct tep_handle *tep) free(tep->events); free(tep->sort_events); free(tep->func_resolver); - tep_free_plugin_paths(tep); + free_tep_plugin_paths(tep); free(tep); } diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index c29b693e31ee..a67ad9a5b835 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -578,14 +578,6 @@ void tep_ref(struct tep_handle *tep); void tep_unref(struct tep_handle *tep); int tep_get_ref(struct tep_handle *tep); -/* access to the internal parser */ -void tep_buffer_init(const char *buf, unsigned long long size); -enum tep_event_type tep_read_token(char **tok); -void tep_free_token(char *token); -int tep_peek_char(void); -const char *tep_get_input_buf(void); -unsigned long long tep_get_input_buf_ptr(void); - /* for debugging */ void tep_print_funcs(struct tep_handle *tep); void tep_print_printk(struct tep_handle *tep); diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index e7c2acb8680f..e7f93d5fe4fd 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -676,7 +676,7 @@ int tep_add_plugin_path(struct tep_handle *tep, char *path, return 0; } -void tep_free_plugin_paths(struct tep_handle *tep) +__hidden void free_tep_plugin_paths(struct tep_handle *tep) { struct tep_plugins_dir *dir; diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index c271aeeb227d..368826bb5a57 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -38,8 +38,8 @@ static void show_error(char *error_buf, const char *fmt, ...) int len; int i; - input = tep_get_input_buf(); - index = tep_get_input_buf_ptr(); + input = get_input_buf(); + index = get_input_buf_ptr(); len = input ? strlen(input) : 0; if (len) { @@ -57,25 +57,20 @@ static void show_error(char *error_buf, const char *fmt, ...) va_end(ap); } -static void free_token(char *token) -{ - tep_free_token(token); -} - -static enum tep_event_type read_token(char **tok) +static enum tep_event_type filter_read_token(char **tok) { enum tep_event_type type; char *token = NULL; do { free_token(token); - type = tep_read_token(&token); + type = read_token(&token); } while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE); /* If token is = or ! check to see if the next char is ~ */ if (token && (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) && - tep_peek_char() == '~') { + peek_char() == '~') { /* append it */ *tok = malloc(3); if (*tok == NULL) { @@ -85,7 +80,7 @@ static enum tep_event_type read_token(char **tok) sprintf(*tok, "%c%c", *token, '~'); free_token(token); /* Now remove the '~' from the buffer */ - tep_read_token(&token); + read_token(&token); free_token(token); } else *tok = token; @@ -959,7 +954,7 @@ process_filter(struct tep_event *event, struct tep_filter_arg **parg, do { free(token); - type = read_token(&token); + type = filter_read_token(&token); switch (type) { case TEP_EVENT_SQUOTE: case TEP_EVENT_DQUOTE: @@ -1185,7 +1180,7 @@ process_event(struct tep_event *event, const char *filter_str, { int ret; - tep_buffer_init(filter_str, strlen(filter_str)); + init_input_buf(filter_str, strlen(filter_str)); ret = process_filter(event, parg, error_str, 0); if (ret < 0) @@ -1243,7 +1238,7 @@ filter_event(struct tep_event_filter *filter, struct tep_event *event, static void filter_init_error_buf(struct tep_event_filter *filter) { /* clear buffer to reset show error */ - tep_buffer_init("", 0); + init_input_buf("", 0); filter->error_buffer[0] = '\0'; } -- cgit v1.3.1 From edac75a2f8c835c0d7768002a02ecd56c05f9981 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 11 Oct 2020 20:10:22 +0800 Subject: perf c2c: Update usage for showing memory events Since commit b027cc6fdf1b ("perf c2c: Fix 'perf c2c record -e list' to show the default events used"), "perf c2c" tool can show the memory events properly, it's no reason to still suggest user to use the command "perf mem record -e list" for showing events. This patch updates the usage for showing memory events with command "perf c2c record -e list". Signed-off-by: Leo Yan Acked-by: Jiri Olsa Acked-by: Ian Rogers Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201011121022.22409-1-leo.yan@linaro.org --- tools/perf/builtin-c2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 5938b100eaf4..57bb6cce43e3 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2916,7 +2916,7 @@ static int perf_c2c__record(int argc, const char **argv) bool event_set = false; struct option options[] = { OPT_CALLBACK('e', "event", &event_set, "event", - "event selector. Use 'perf mem record -e list' to list available events", + "event selector. Use 'perf c2c record -e list' to list available events", parse_record_events), OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"), OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"), -- cgit v1.3.1 From f3013f7ed465479e60c1ab1921a5718fc541cc3b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 1 Oct 2020 11:34:19 +0200 Subject: perf trace: Fix off by ones in memset() after realloc() in arches using libaudit 'perf trace ls' started crashing after commit d21cb73a9025 on !HAVE_SYSCALL_TABLE_SUPPORT configs (armv7l here) like this: 0 strlen () at ../sysdeps/arm/armv6t2/strlen.S:126 1 0xb6800780 in __vfprintf_internal (s=0xbeff9908, s@entry=0xbeff9900, format=0xa27160 "]: %s()", ap=..., mode_flags=) at vfprintf-internal.c:1688 ... 5 0x0056ecdc in fprintf (__fmt=0xa27160 "]: %s()", __stream=) at /usr/include/bits/stdio2.h:100 6 trace__sys_exit (trace=trace@entry=0xbeffc710, evsel=evsel@entry=0xd968d0, event=, sample=sample@entry=0xbeffc3e8) at builtin-trace.c:2475 7 0x00566d40 in trace__handle_event (sample=0xbeffc3e8, event=, trace=0xbeffc710) at builtin-trace.c:3122 ... 15 main (argc=2, argv=0xbefff6e8) at perf.c:538 It is because memset in trace__read_syscall_info zeroes wrong memory: 1) when initializing for the first time, it does not reset the last id. 2) in other cases, it resets the last id of previous buffer. ad 1) it causes the crash above as sc->name used in the fprintf above contains garbage. ad 2) it sets nonexistent from true back to false for id 11 here. Not sure, what the consequences are. So fix it by introducing a special case for the initial initialization and do the right +1 in both cases. Fixes: d21cb73a9025 ("perf trace: Grow the syscall table as needed when using libaudit") Signed-off-by: Jiri Slaby Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201001093419.15761-1-jslaby@suse.cz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bea461b6f937..44a75f234db1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1762,7 +1762,11 @@ static int trace__read_syscall_info(struct trace *trace, int id) if (table == NULL) return -ENOMEM; - memset(table + trace->sctbl->syscalls.max_id, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); + // Need to memset from offset 0 and +1 members if brand new + if (trace->syscalls.table == NULL) + memset(table, 0, (id + 1) * sizeof(*sc)); + else + memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); trace->syscalls.table = table; trace->sctbl->syscalls.max_id = id; -- cgit v1.3.1 From 6cf4ecf5c51d735433b3348ad055187536b1b4b6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 9 Oct 2020 14:25:23 +0200 Subject: perf build: Allow nested externs to enable BUILD_BUG() usage Currently BUILD_BUG() macro is expanded to smth like the following: do { extern void __compiletime_assert_0(void) __attribute__((error("BUILD_BUG failed"))); if (!(!(1))) __compiletime_assert_0(); } while (0); If used in a function body this obviously would produce build errors with -Wnested-externs and -Werror. To enable BUILD_BUG() usage in tools/arch/x86/lib/insn.c which perf includes in intel-pt-decoder, build perf without -Wnested-externs. Reported-by: Stephen Rothwell Signed-off-by: Vasily Gorbik Acked-by: Jiri Olsa Tested-by: Stephen Rothwell # build tested Cc: Alexander Shishkin Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/patch-1.thread-251403.git-2514037e9477.your-ad-here.call-01602244460-ext-7088@work.hours Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 854da830b5ca..834061e94e7c 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -16,7 +16,7 @@ $(shell printf "" > $(OUTPUT).config-detected) detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) -CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) +CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) include $(srctree)/tools/scripts/Makefile.arch -- cgit v1.3.1 From 0fd0f00fdbc9fdae7651b048d546fdfab2e47fce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Oct 2020 16:22:03 -0300 Subject: perf tests: Show python test script in verbose mode To help figure out where it is getting the binding. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/python-use.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c index 40ab72149ce1..98c6d474aa6f 100644 --- a/tools/perf/tests/python-use.c +++ b/tools/perf/tests/python-use.c @@ -18,6 +18,7 @@ int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unuse PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0) return -1; + pr_debug("python usage test: \"%s\"\n", cmd); ret = system(cmd) ? -1 : 0; free(cmd); return ret; -- cgit v1.3.1 From 79373082fa9de8bea909836ee33ca0bc199f71f6 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 5 Oct 2020 09:06:45 +0100 Subject: perf python: Autodetect python3 binary Some distros don't come with python2 and only have python3 available. This causes the "'import perf' in python" self test to fail. This change adds python3 to the list of possible python versions that are autodetected but maintains the priorities for 'python2' and 'python' detection. Python3 has the lowest priority. Committer notes: On a fedora system without python2 packages the 'perf test python' continues to work: # python2 bash: python2: command not found... Similar command is: 'python' # rpm -qa | grep python2 # That "Similar command" gives the clue: # rpm -qf /usr/bin/python python-unversioned-command-3.8.5-5.fc32.noarch # rpm -ql python-unversioned-command /usr/bin/python /usr/share/man/man1/python.1.gz # With it in place the 'python' binary is found and perf builds the python binding using python3: # perf test -v python 19: 'import perf' in python : --- start --- test child forked, pid 379988 python usage test: "echo "import sys ; sys.path.append('/tmp/build/perf/python'); import perf" | '/usr/bin/python' " test child finished with 0 ---- end ---- 'import perf' in python: Ok # Looking at that path: # ls -la /tmp/build/perf/python total 1864 drwxrwxr-x. 2 acme acme 60 Oct 13 16:20 . drwxrwxr-x. 18 acme acme 4420 Oct 13 16:28 .. -rwxrwxr-x. 1 acme acme 1907216 Oct 13 16:28 perf.cpython-38-x86_64-linux-gnu.so # And: # ldd ~/bin/perf | grep python libpython3.8.so.1.0 => /lib64/libpython3.8.so.1.0 (0x00007f5471187000) # As soon as we remove it: # rpm -e python-unversioned-command-3.8.5-5.fc32.noarch # hash -r # python bash: python: command not found... Install package 'python-unversioned-command' to provide command 'python'? [N/y] n # And rebuilding perf now doesn't find python in the system: make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j24' parallel build Makefile.config:786: No python interpreter was found: disables Python support - please install python-devel/python-dev After this patch: $ rpm -qi python-unversioned-command package python-unversioned-command is not installed $ $ python bash: python: command not found... Install package 'python-unversioned-command' to provide command 'python'? [N/y] ^C $ $ m make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j24' parallel build CC /tmp/build/perf/tests/attr.o CC /tmp/build/perf/tests/python-use.o DESCEND plugins GEN /tmp/build/perf/python/perf.so INSTALL trace_plugins LD /tmp/build/perf/tests/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf make: Leaving directory '/home/acme/git/perf/tools/perf' 19: 'import perf' in python : Ok $ ldd ~/bin/perf | grep python libpython3.8.so.1.0 => /lib64/libpython3.8.so.1.0 (0x00007f2c8c708000) $ ls -la /tmp/build/perf/python total 1864 drwxrwxr-x. 2 acme acme 60 Oct 13 16:20 . drwxrwxr-x. 18 acme acme 4420 Oct 13 16:31 .. -rwxrwxr-x. 1 acme acme 1907216 Oct 13 16:31 perf.cpython-38-x86_64-linux-gnu.so $ Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra LPU-Reference: 20201005080645.6588-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 834061e94e7c..82c045da6ada 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -223,14 +223,17 @@ endif # Try different combinations to accommodate systems that only have # python[2][-config] in weird combinations but always preferring -# python2 and python2-config as per pep-0394. If we catch a -# python[-config] in version 3, the version check will kill it. -PYTHON2 := $(if $(call get-executable,python2),python2,python) -override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) -PYTHON2_CONFIG := \ +# python2 and python2-config as per pep-0394. If python2 or python +# aren't found, then python3 is used. +PYTHON_AUTO := python +PYTHON_AUTO := $(if $(call get-executable,python3),python3,$(PYTHON_AUTO)) +PYTHON_AUTO := $(if $(call get-executable,python),python,$(PYTHON_AUTO)) +PYTHON_AUTO := $(if $(call get-executable,python2),python2,$(PYTHON_AUTO)) +override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON_AUTO)) +PYTHON_AUTO_CONFIG := \ $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) override PYTHON_CONFIG := \ - $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) + $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO_CONFIG)) grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) -- cgit v1.3.1 From 79bbbabd227897745786e81ed814ad86c5e1295d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Oct 2020 17:03:19 -0300 Subject: perf config: Export the perf_config_from_file() function We'll use it to ask for extra config files to be loaded, profile like stuff that will be used first to make 'perf trace' mimic 'strace' output via a 'perf strace' command that just sets up 'perf trace' output. At some point it'll be used for regression tests, where we'll run some simple commands like: perf strace ls > perf-strace.output strace ls > strace.output And then do some mutable syscall arg aware diff like tool to deal with arguments for things like mmap, that change at each execution, to be first ignored and then properly tracked when used accoss multiple syscalls. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 2 +- tools/perf/util/config.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 20be0504fb95..6969f82843ee 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -489,7 +489,7 @@ int perf_default_config(const char *var, const char *value, return 0; } -static int perf_config_from_file(config_fn_t fn, const char *filename, void *data) +int perf_config_from_file(config_fn_t fn, const char *filename, void *data) { int ret; FILE *f = fopen(filename, "r"); diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index c10b66dde2f3..8c881e3a3ec3 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -26,6 +26,8 @@ struct perf_config_set { extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); + +int perf_config_from_file(config_fn_t fn, const char *filename, void *data); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_int(int *dest, const char *, const char *); -- cgit v1.3.1 From f5516ec5efb9fe0f426a46eeef25d389d3c2f988 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:49:43 -0700 Subject: device-dax: make pgmap optional for instance creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The passed in dev_pagemap is only required in the pmem case as the libnvdimm core may have reserved a vmem_altmap for dev_memremap_pages() to place the memmap in pmem directly. In the hmem case there is no agent reserving an altmap so it can all be handled by a core internal default. Pass the resource range via a new @range property of 'struct dev_dax_data'. Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Cc: David Hildenbrand Cc: Vishal Verma Cc: Dave Hansen Cc: Pavel Tatashin Cc: Brice Goglin Cc: Dave Jiang Cc: Ira Weiny Cc: Jia He Cc: Joao Martins Cc: Jonathan Cameron Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Ben Skeggs Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Catalin Marinas Cc: Daniel Vetter Cc: David Airlie Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Hulk Robot Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Jason Yan Cc: Jeff Moyer Cc: "Jérôme Glisse" Cc: Juergen Gross Cc: kernel test robot Cc: Michael Ellerman Cc: Mike Rapoport Cc: Paul Mackerras Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Randy Dunlap Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Vivek Goyal Cc: Wei Yang Cc: Will Deacon Link: https://lkml.kernel.org/r/159643099958.4062302.10379230791041872886.stgit@dwillia2-desk3.amr.corp.intel.com Link: https://lkml.kernel.org/r/160106110513.30709.4303239334850606031.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- drivers/dax/bus.c | 29 +++++++++++++++-------------- drivers/dax/bus.h | 2 ++ drivers/dax/dax-private.h | 9 ++++++++- drivers/dax/device.c | 28 +++++++++++++++++++--------- drivers/dax/hmem/hmem.c | 8 ++++---- drivers/dax/kmem.c | 12 ++++++------ drivers/dax/pmem/core.c | 4 ++++ tools/testing/nvdimm/dax-dev.c | 8 ++++---- 8 files changed, 62 insertions(+), 38 deletions(-) (limited to 'tools') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index dffa4655e128..96bd64ba95a5 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -271,7 +271,7 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_dax *dev_dax = to_dev_dax(dev); - unsigned long long size = resource_size(&dev_dax->region->res); + unsigned long long size = range_len(&dev_dax->range); return sprintf(buf, "%llu\n", size); } @@ -293,19 +293,12 @@ static ssize_t target_node_show(struct device *dev, } static DEVICE_ATTR_RO(target_node); -static unsigned long long dev_dax_resource(struct dev_dax *dev_dax) -{ - struct dax_region *dax_region = dev_dax->region; - - return dax_region->res.start; -} - static ssize_t resource_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_dax *dev_dax = to_dev_dax(dev); - return sprintf(buf, "%#llx\n", dev_dax_resource(dev_dax)); + return sprintf(buf, "%#llx\n", dev_dax->range.start); } static DEVICE_ATTR(resource, 0400, resource_show, NULL); @@ -376,6 +369,7 @@ static void dev_dax_release(struct device *dev) dax_region_put(dax_region); put_dax(dax_dev); + kfree(dev_dax->pgmap); kfree(dev_dax); } @@ -412,7 +406,12 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) if (!dev_dax) return ERR_PTR(-ENOMEM); - memcpy(&dev_dax->pgmap, data->pgmap, sizeof(struct dev_pagemap)); + if (data->pgmap) { + dev_dax->pgmap = kmemdup(data->pgmap, + sizeof(struct dev_pagemap), GFP_KERNEL); + if (!dev_dax->pgmap) + goto err_pgmap; + } /* * No 'host' or dax_operations since there is no access to this @@ -421,18 +420,19 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); if (IS_ERR(dax_dev)) { rc = PTR_ERR(dax_dev); - goto err; + goto err_alloc_dax; } /* a device_dax instance is dead while the driver is not attached */ kill_dax(dax_dev); - /* from here on we're committed to teardown via dax_dev_release() */ + /* from here on we're committed to teardown via dev_dax_release() */ dev = &dev_dax->dev; device_initialize(dev); dev_dax->dax_dev = dax_dev; dev_dax->region = dax_region; + dev_dax->range = data->range; dev_dax->target_node = dax_region->target_node; kref_get(&dax_region->kref); @@ -458,8 +458,9 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) return ERR_PTR(rc); return dev_dax; - - err: +err_alloc_dax: + kfree(dev_dax->pgmap); +err_pgmap: kfree(dev_dax); return ERR_PTR(rc); diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 299c2e7fac09..4aeb36da83a4 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -3,6 +3,7 @@ #ifndef __DAX_BUS_H__ #define __DAX_BUS_H__ #include +#include struct dev_dax; struct resource; @@ -21,6 +22,7 @@ struct dev_dax_data { struct dax_region *dax_region; struct dev_pagemap *pgmap; enum dev_dax_subsys subsys; + struct range range; int id; }; diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 8a4c40ccd2ef..6779f683671d 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -41,6 +41,7 @@ struct dax_region { * @target_node: effective numa node if dev_dax memory range is onlined * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) + * @range: resource range for the instance * @dax_mem_res: physical address range of hotadded DAX memory * @dax_mem_name: name for hotadded DAX memory via add_memory_driver_managed() */ @@ -49,10 +50,16 @@ struct dev_dax { struct dax_device *dax_dev; int target_node; struct device dev; - struct dev_pagemap pgmap; + struct dev_pagemap *pgmap; + struct range range; struct resource *dax_kmem_res; }; +static inline u64 range_len(struct range *range) +{ + return range->end - range->start + 1; +} + static inline struct dev_dax *to_dev_dax(struct device *dev) { return container_of(dev, struct dev_dax, dev); diff --git a/drivers/dax/device.c b/drivers/dax/device.c index c528b725789b..287cf0a3db23 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -55,12 +55,12 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) { - struct resource *res = &dev_dax->region->res; + struct range *range = &dev_dax->range; phys_addr_t phys; - phys = pgoff * PAGE_SIZE + res->start; - if (phys >= res->start && phys <= res->end) { - if (phys + size - 1 <= res->end) + phys = pgoff * PAGE_SIZE + range->start; + if (phys >= range->start && phys <= range->end) { + if (phys + size - 1 <= range->end) return phys; } @@ -396,21 +396,31 @@ int dev_dax_probe(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); struct dax_device *dax_dev = dev_dax->dax_dev; - struct resource *res = &dev_dax->region->res; + struct range *range = &dev_dax->range; + struct dev_pagemap *pgmap; struct inode *inode; struct cdev *cdev; void *addr; int rc; /* 1:1 map region resource range to device-dax instance range */ - if (!devm_request_mem_region(dev, res->start, resource_size(res), + if (!devm_request_mem_region(dev, range->start, range_len(range), dev_name(dev))) { - dev_warn(dev, "could not reserve region %pR\n", res); + dev_warn(dev, "could not reserve range: %#llx - %#llx\n", + range->start, range->end); return -EBUSY; } - dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC; - addr = devm_memremap_pages(dev, &dev_dax->pgmap); + pgmap = dev_dax->pgmap; + if (!pgmap) { + pgmap = devm_kzalloc(dev, sizeof(*pgmap), GFP_KERNEL); + if (!pgmap) + return -ENOMEM; + pgmap->res.start = range->start; + pgmap->res.end = range->end; + } + pgmap->type = MEMORY_DEVICE_GENERIC; + addr = devm_memremap_pages(dev, pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index b84fe17178d8..af82d6ba820a 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -8,7 +8,6 @@ static int dax_hmem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct dev_pagemap pgmap = { }; struct dax_region *dax_region; struct memregion_info *mri; struct dev_dax_data data; @@ -20,8 +19,6 @@ static int dax_hmem_probe(struct platform_device *pdev) return -ENOMEM; mri = dev->platform_data; - memcpy(&pgmap.res, res, sizeof(*res)); - dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node, PMD_SIZE); if (!dax_region) @@ -30,7 +27,10 @@ static int dax_hmem_probe(struct platform_device *pdev) data = (struct dev_dax_data) { .dax_region = dax_region, .id = 0, - .pgmap = &pgmap, + .range = { + .start = res->start, + .end = res->end, + }, }; dev_dax = devm_create_dev_dax(&data); if (IS_ERR(dev_dax)) diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 275aa5f87399..5bb133df147d 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -22,7 +22,7 @@ static bool any_hotremove_failed; int dev_dax_kmem_probe(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); - struct resource *res = &dev_dax->region->res; + struct range *range = &dev_dax->range; resource_size_t kmem_start; resource_size_t kmem_size; resource_size_t kmem_end; @@ -39,17 +39,17 @@ int dev_dax_kmem_probe(struct device *dev) */ numa_node = dev_dax->target_node; if (numa_node < 0) { - dev_warn(dev, "rejecting DAX region %pR with invalid node: %d\n", - res, numa_node); + dev_warn(dev, "rejecting DAX region with invalid node: %d\n", + numa_node); return -EINVAL; } /* Hotplug starting at the beginning of the next block: */ - kmem_start = ALIGN(res->start, memory_block_size_bytes()); + kmem_start = ALIGN(range->start, memory_block_size_bytes()); - kmem_size = resource_size(res); + kmem_size = range_len(range); /* Adjust the size down to compensate for moving up kmem_start: */ - kmem_size -= kmem_start - res->start; + kmem_size -= kmem_start - range->start; /* Align the size down to cover only complete blocks: */ kmem_size &= ~(memory_block_size_bytes() - 1); kmem_end = kmem_start + kmem_size; diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c index 08ee5947a49c..4fa81d3d2f65 100644 --- a/drivers/dax/pmem/core.c +++ b/drivers/dax/pmem/core.c @@ -63,6 +63,10 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) .id = id, .pgmap = &pgmap, .subsys = subsys, + .range = { + .start = res.start, + .end = res.end, + }, }; dev_dax = devm_create_dev_dax(&data); diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c index 7e5d979e73cb..38d8e55c4a0d 100644 --- a/tools/testing/nvdimm/dax-dev.c +++ b/tools/testing/nvdimm/dax-dev.c @@ -9,12 +9,12 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) { - struct resource *res = &dev_dax->region->res; + struct range *range = &dev_dax->range; phys_addr_t addr; - addr = pgoff * PAGE_SIZE + res->start; - if (addr >= res->start && addr <= res->end) { - if (addr + size - 1 <= res->end) { + addr = pgoff * PAGE_SIZE + range->start; + if (addr >= range->start && addr <= range->end) { + if (addr + size - 1 <= range->end) { if (get_nfit_res(addr)) { struct page *page; -- cgit v1.3.1 From a4574f63edc6f76fb46dcd65d3eb4d5a8e23ba38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:50:29 -0700 Subject: mm/memremap_pages: convert to 'struct range' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'struct resource' in 'struct dev_pagemap' is only used for holding resource span information. The other fields, 'name', 'flags', 'desc', 'parent', 'sibling', and 'child' are all unused wasted space. This is in preparation for introducing a multi-range extension of devm_memremap_pages(). The bulk of this change is unwinding all the places internal to libnvdimm that used 'struct resource' unnecessarily, and replacing instances of 'struct dev_pagemap'.res with 'struct dev_pagemap'.range. P2PDMA had a minor usage of the resource flags field, but only to report failures with "%pR". That is replaced with an open coded print of the range. [dan.carpenter@oracle.com: mm/hmm/test: use after free in dmirror_allocate_chunk()] Link: https://lkml.kernel.org/r/20200926121402.GA7467@kadam Signed-off-by: Dan Williams Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Reviewed-by: Boris Ostrovsky [xen] Cc: Paul Mackerras Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Vishal Verma Cc: Vivek Goyal Cc: Dave Jiang Cc: Ben Skeggs Cc: David Airlie Cc: Daniel Vetter Cc: Ira Weiny Cc: Bjorn Helgaas Cc: Juergen Gross Cc: Stefano Stabellini Cc: "Jérôme Glisse" Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brice Goglin Cc: Catalin Marinas Cc: Dave Hansen Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Hulk Robot Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Jason Yan Cc: Jeff Moyer Cc: Jia He Cc: Joao Martins Cc: Jonathan Cameron Cc: kernel test robot Cc: Mike Rapoport Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Randy Dunlap Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Wei Yang Cc: Will Deacon Link: https://lkml.kernel.org/r/159643103173.4062302.768998885691711532.stgit@dwillia2-desk3.amr.corp.intel.com Link: https://lkml.kernel.org/r/160106115761.30709.13539840236873663620.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- arch/powerpc/kvm/book3s_hv_uvmem.c | 13 +++--- drivers/dax/bus.c | 10 ++--- drivers/dax/bus.h | 2 +- drivers/dax/dax-private.h | 5 --- drivers/dax/device.c | 3 +- drivers/dax/hmem/hmem.c | 5 ++- drivers/dax/pmem/core.c | 12 +++--- drivers/gpu/drm/nouveau/nouveau_dmem.c | 14 +++---- drivers/nvdimm/badrange.c | 26 ++++++------ drivers/nvdimm/claim.c | 13 +++--- drivers/nvdimm/nd.h | 3 +- drivers/nvdimm/pfn_devs.c | 12 +++--- drivers/nvdimm/pmem.c | 26 ++++++------ drivers/nvdimm/region.c | 21 ++++++---- drivers/pci/p2pdma.c | 11 +++-- drivers/xen/unpopulated-alloc.c | 44 ++++++++++++------- include/linux/memremap.h | 5 ++- include/linux/range.h | 6 +++ lib/test_hmm.c | 50 +++++++++++----------- mm/memremap.c | 77 +++++++++++++++++----------------- tools/testing/nvdimm/test/iomap.c | 2 +- 21 files changed, 195 insertions(+), 165 deletions(-) (limited to 'tools') diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 7705d5557239..29ec555055c2 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -687,9 +687,9 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) struct kvmppc_uvmem_page_pvt *pvt; unsigned long pfn_last, pfn_first; - pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT; + pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT; pfn_last = pfn_first + - (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT); + (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT); spin_lock(&kvmppc_uvmem_bitmap_lock); bit = find_first_zero_bit(kvmppc_uvmem_bitmap, @@ -1007,7 +1007,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) static void kvmppc_uvmem_page_free(struct page *page) { unsigned long pfn = page_to_pfn(page) - - (kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT); + (kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT); struct kvmppc_uvmem_page_pvt *pvt; spin_lock(&kvmppc_uvmem_bitmap_lock); @@ -1170,7 +1170,8 @@ int kvmppc_uvmem_init(void) } kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; - kvmppc_uvmem_pgmap.res = *res; + kvmppc_uvmem_pgmap.range.start = res->start; + kvmppc_uvmem_pgmap.range.end = res->end; kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; /* just one global instance: */ kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap; @@ -1205,7 +1206,7 @@ void kvmppc_uvmem_free(void) return; memunmap_pages(&kvmppc_uvmem_pgmap); - release_mem_region(kvmppc_uvmem_pgmap.res.start, - resource_size(&kvmppc_uvmem_pgmap.res)); + release_mem_region(kvmppc_uvmem_pgmap.range.start, + range_len(&kvmppc_uvmem_pgmap.range)); kfree(kvmppc_uvmem_bitmap); } diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 53d07f2f1285..00fa73a8dfb4 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -515,7 +515,7 @@ static void dax_region_unregister(void *region) } struct dax_region *alloc_dax_region(struct device *parent, int region_id, - struct resource *res, int target_node, unsigned int align, + struct range *range, int target_node, unsigned int align, unsigned long flags) { struct dax_region *dax_region; @@ -530,8 +530,8 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, return NULL; } - if (!IS_ALIGNED(res->start, align) - || !IS_ALIGNED(resource_size(res), align)) + if (!IS_ALIGNED(range->start, align) + || !IS_ALIGNED(range_len(range), align)) return NULL; dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); @@ -546,8 +546,8 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, dax_region->target_node = target_node; ida_init(&dax_region->ida); dax_region->res = (struct resource) { - .start = res->start, - .end = res->end, + .start = range->start, + .end = range->end, .flags = IORESOURCE_MEM | flags, }; diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index da27ea70a19a..72b92f95509f 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -13,7 +13,7 @@ void dax_region_put(struct dax_region *dax_region); #define IORESOURCE_DAX_STATIC (1UL << 0) struct dax_region *alloc_dax_region(struct device *parent, int region_id, - struct resource *res, int target_node, unsigned int align, + struct range *range, int target_node, unsigned int align, unsigned long flags); enum dev_dax_subsys { diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index b81a1494d82b..0cbb2ec81ca7 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -61,11 +61,6 @@ struct dev_dax { struct range range; }; -static inline u64 range_len(struct range *range) -{ - return range->end - range->start + 1; -} - static inline struct dev_dax *to_dev_dax(struct device *dev) { return container_of(dev, struct dev_dax, dev); diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 9833fa83b537..a14448bca83d 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -416,8 +416,7 @@ int dev_dax_probe(struct dev_dax *dev_dax) pgmap = devm_kzalloc(dev, sizeof(*pgmap), GFP_KERNEL); if (!pgmap) return -ENOMEM; - pgmap->res.start = range->start; - pgmap->res.end = range->end; + pgmap->range = *range; } pgmap->type = MEMORY_DEVICE_GENERIC; addr = devm_memremap_pages(dev, pgmap); diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index aa260009dfc7..1a3347bb6143 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -13,13 +13,16 @@ static int dax_hmem_probe(struct platform_device *pdev) struct dev_dax_data data; struct dev_dax *dev_dax; struct resource *res; + struct range range; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENOMEM; mri = dev->platform_data; - dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node, + range.start = res->start; + range.end = res->end; + dax_region = alloc_dax_region(dev, pdev->id, &range, mri->target_node, PMD_SIZE, 0); if (!dax_region) return -ENOMEM; diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c index 4fe700884338..62b26bfceab1 100644 --- a/drivers/dax/pmem/core.c +++ b/drivers/dax/pmem/core.c @@ -9,7 +9,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) { - struct resource res; + struct range range; int rc, id, region_id; resource_size_t offset; struct nd_pfn_sb *pfn_sb; @@ -50,10 +50,10 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) if (rc != 2) return ERR_PTR(-EINVAL); - /* adjust the dax_region resource to the start of data */ - memcpy(&res, &pgmap.res, sizeof(res)); - res.start += offset; - dax_region = alloc_dax_region(dev, region_id, &res, + /* adjust the dax_region range to the start of data */ + range = pgmap.range; + range.start += offset, + dax_region = alloc_dax_region(dev, region_id, &range, nd_region->target_node, le32_to_cpu(pfn_sb->align), IORESOURCE_DAX_STATIC); if (!dax_region) @@ -64,7 +64,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) .id = id, .pgmap = &pgmap, .subsys = subsys, - .size = resource_size(&res), + .size = range_len(&range), }; dev_dax = devm_create_dev_dax(&data); diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 4e8112fde3e6..25811ed7e274 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -101,7 +101,7 @@ unsigned long nouveau_dmem_page_addr(struct page *page) { struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) - - chunk->pagemap.res.start; + chunk->pagemap.range.start; return chunk->bo->offset + off; } @@ -249,7 +249,8 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage) chunk->drm = drm; chunk->pagemap.type = MEMORY_DEVICE_PRIVATE; - chunk->pagemap.res = *res; + chunk->pagemap.range.start = res->start; + chunk->pagemap.range.end = res->end; chunk->pagemap.ops = &nouveau_dmem_pagemap_ops; chunk->pagemap.owner = drm->dev; @@ -273,7 +274,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage) list_add(&chunk->list, &drm->dmem->chunks); mutex_unlock(&drm->dmem->mutex); - pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT; + pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT; page = pfn_to_page(pfn_first); spin_lock(&drm->dmem->lock); for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) { @@ -294,8 +295,7 @@ out_bo_unpin: out_bo_free: nouveau_bo_ref(NULL, &chunk->bo); out_release: - release_mem_region(chunk->pagemap.res.start, - resource_size(&chunk->pagemap.res)); + release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range)); out_free: kfree(chunk); out: @@ -382,8 +382,8 @@ nouveau_dmem_fini(struct nouveau_drm *drm) nouveau_bo_ref(NULL, &chunk->bo); list_del(&chunk->list); memunmap_pages(&chunk->pagemap); - release_mem_region(chunk->pagemap.res.start, - resource_size(&chunk->pagemap.res)); + release_mem_region(chunk->pagemap.range.start, + range_len(&chunk->pagemap.range)); kfree(chunk); } diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c index b9eeefa27e3a..aaf6e215a8c6 100644 --- a/drivers/nvdimm/badrange.c +++ b/drivers/nvdimm/badrange.c @@ -211,7 +211,7 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) } static void badblocks_populate(struct badrange *badrange, - struct badblocks *bb, const struct resource *res) + struct badblocks *bb, const struct range *range) { struct badrange_entry *bre; @@ -222,34 +222,34 @@ static void badblocks_populate(struct badrange *badrange, u64 bre_end = bre->start + bre->length - 1; /* Discard intervals with no intersection */ - if (bre_end < res->start) + if (bre_end < range->start) continue; - if (bre->start > res->end) + if (bre->start > range->end) continue; /* Deal with any overlap after start of the namespace */ - if (bre->start >= res->start) { + if (bre->start >= range->start) { u64 start = bre->start; u64 len; - if (bre_end <= res->end) + if (bre_end <= range->end) len = bre->length; else - len = res->start + resource_size(res) + len = range->start + range_len(range) - bre->start; - __add_badblock_range(bb, start - res->start, len); + __add_badblock_range(bb, start - range->start, len); continue; } /* * Deal with overlap for badrange starting before * the namespace. */ - if (bre->start < res->start) { + if (bre->start < range->start) { u64 len; - if (bre_end < res->end) - len = bre->start + bre->length - res->start; + if (bre_end < range->end) + len = bre->start + bre->length - range->start; else - len = resource_size(res); + len = range_len(range); __add_badblock_range(bb, 0, len); } } @@ -267,7 +267,7 @@ static void badblocks_populate(struct badrange *badrange, * and add badblocks entries for all matching sub-ranges */ void nvdimm_badblocks_populate(struct nd_region *nd_region, - struct badblocks *bb, const struct resource *res) + struct badblocks *bb, const struct range *range) { struct nvdimm_bus *nvdimm_bus; @@ -279,7 +279,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region, nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); nvdimm_bus_lock(&nvdimm_bus->dev); - badblocks_populate(&nvdimm_bus->badrange, bb, res); + badblocks_populate(&nvdimm_bus->badrange, bb, range); nvdimm_bus_unlock(&nvdimm_bus->dev); } EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 22d865ba6353..5a7c80053c62 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -303,13 +303,16 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio, resource_size_t size) { - struct resource *res = &nsio->res; struct nd_namespace_common *ndns = &nsio->common; + struct range range = { + .start = nsio->res.start, + .end = nsio->res.end, + }; nsio->size = size; - if (!devm_request_mem_region(dev, res->start, size, + if (!devm_request_mem_region(dev, range.start, size, dev_name(&ndns->dev))) { - dev_warn(dev, "could not reserve region %pR\n", res); + dev_warn(dev, "could not reserve region %pR\n", &nsio->res); return -EBUSY; } @@ -317,9 +320,9 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio, if (devm_init_badblocks(dev, &nsio->bb)) return -ENOMEM; nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb, - &nsio->res); + &range); - nsio->addr = devm_memremap(dev, res->start, size, ARCH_MEMREMAP_PMEM); + nsio->addr = devm_memremap(dev, range.start, size, ARCH_MEMREMAP_PMEM); return PTR_ERR_OR_ZERO(nsio->addr); } diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 72740108ba42..696b55556d4d 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -377,8 +377,9 @@ int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt); const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name); unsigned int pmem_sector_size(struct nd_namespace_common *ndns); +struct range; void nvdimm_badblocks_populate(struct nd_region *nd_region, - struct badblocks *bb, const struct resource *res); + struct badblocks *bb, const struct range *range); int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns, resource_size_t size); void devm_namespace_disable(struct device *dev, diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 3e11ef8d3f5b..3c4787b92a6a 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -672,7 +672,7 @@ static unsigned long init_altmap_reserve(resource_size_t base) static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) { - struct resource *res = &pgmap->res; + struct range *range = &pgmap->range; struct vmem_altmap *altmap = &pgmap->altmap; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = le64_to_cpu(pfn_sb->dataoff); @@ -689,16 +689,16 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) .end_pfn = PHYS_PFN(end), }; - memcpy(res, &nsio->res, sizeof(*res)); - res->start += start_pad; - res->end -= end_trunc; - + *range = (struct range) { + .start = nsio->res.start + start_pad, + .end = nsio->res.end - end_trunc, + }; if (nd_pfn->mode == PFN_MODE_RAM) { if (offset < reserve) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset)); + nd_pfn->npfns = PHYS_PFN((range_len(range) - offset)); if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, "number of pfns truncated from %lld to %ld\n", diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index c86a0ceaece6..1f394f44838f 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -375,7 +375,7 @@ static int pmem_attach_disk(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); int nid = dev_to_node(dev), fua; struct resource *res = &nsio->res; - struct resource bb_res; + struct range bb_range; struct nd_pfn *nd_pfn = NULL; struct dax_device *dax_dev; struct nd_pfn_sb *pfn_sb; @@ -434,24 +434,26 @@ static int pmem_attach_disk(struct device *dev, pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); pmem->pfn_pad = resource_size(res) - - resource_size(&pmem->pgmap.res); + range_len(&pmem->pgmap.range); pmem->pfn_flags |= PFN_MAP; - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); - bb_res.start += pmem->data_offset; + bb_range = pmem->pgmap.range; + bb_range.start += pmem->data_offset; } else if (pmem_should_map_pages(dev)) { - memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); + pmem->pgmap.range.start = res->start; + pmem->pgmap.range.end = res->end; pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); + bb_range = pmem->pgmap.range; } else { if (devm_add_action_or_reset(dev, pmem_release_queue, &pmem->pgmap)) return -ENOMEM; addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); - memcpy(&bb_res, &nsio->res, sizeof(bb_res)); + bb_range.start = res->start; + bb_range.end = res->end; } if (IS_ERR(addr)) @@ -480,7 +482,7 @@ static int pmem_attach_disk(struct device *dev, / 512); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); + nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range); disk->bb = &pmem->bb; if (is_nvdimm_sync(nd_region)) @@ -591,8 +593,8 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) resource_size_t offset = 0, end_trunc = 0; struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; - struct resource res; struct badblocks *bb; + struct range range; struct kernfs_node *bb_state; if (event != NVDIMM_REVALIDATE_POISON) @@ -628,9 +630,9 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) nsio = to_nd_namespace_io(&ndns->dev); } - res.start = nsio->res.start + offset; - res.end = nsio->res.end - end_trunc; - nvdimm_badblocks_populate(nd_region, bb, &res); + range.start = nsio->res.start + offset; + range.end = nsio->res.end - end_trunc; + nvdimm_badblocks_populate(nd_region, bb, &range); if (bb_state) sysfs_notify_dirent(bb_state); } diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 0f6978e72e7c..bfce87ed72ab 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -35,7 +35,10 @@ static int nd_region_probe(struct device *dev) return rc; if (is_memory(&nd_region->dev)) { - struct resource ndr_res; + struct range range = { + .start = nd_region->ndr_start, + .end = nd_region->ndr_start + nd_region->ndr_size - 1, + }; if (devm_init_badblocks(dev, &nd_region->bb)) return -ENODEV; @@ -44,9 +47,7 @@ static int nd_region_probe(struct device *dev) if (!nd_region->bb_state) dev_warn(&nd_region->dev, "'badblocks' notification disabled\n"); - ndr_res.start = nd_region->ndr_start; - ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1; - nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res); + nvdimm_badblocks_populate(nd_region, &nd_region->bb, &range); } rc = nd_region_register_namespaces(nd_region, &err); @@ -121,14 +122,16 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event) { if (event == NVDIMM_REVALIDATE_POISON) { struct nd_region *nd_region = to_nd_region(dev); - struct resource res; if (is_memory(&nd_region->dev)) { - res.start = nd_region->ndr_start; - res.end = nd_region->ndr_start + - nd_region->ndr_size - 1; + struct range range = { + .start = nd_region->ndr_start, + .end = nd_region->ndr_start + + nd_region->ndr_size - 1, + }; + nvdimm_badblocks_populate(nd_region, - &nd_region->bb, &res); + &nd_region->bb, &range); if (nd_region->bb_state) sysfs_notify_dirent(nd_region->bb_state); } diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index f357f9a32b3a..256850513813 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -185,9 +185,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, return -ENOMEM; pgmap = &p2p_pgmap->pgmap; - pgmap->res.start = pci_resource_start(pdev, bar) + offset; - pgmap->res.end = pgmap->res.start + size - 1; - pgmap->res.flags = pci_resource_flags(pdev, bar); + pgmap->range.start = pci_resource_start(pdev, bar) + offset; + pgmap->range.end = pgmap->range.start + size - 1; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; p2p_pgmap->provider = pdev; @@ -202,13 +201,13 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, - resource_size(&pgmap->res), dev_to_node(&pdev->dev), + range_len(&pgmap->range), dev_to_node(&pdev->dev), pgmap->ref); if (error) goto pages_free; - pci_info(pdev, "added peer-to-peer DMA memory %pR\n", - &pgmap->res); + pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n", + pgmap->range.start, pgmap->range.end); return 0; diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index 3b98dc921426..091b8669eca3 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -18,27 +18,37 @@ static unsigned int list_count; static int fill_list(unsigned int nr_pages) { struct dev_pagemap *pgmap; + struct resource *res; void *vaddr; unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION); - int ret; + int ret = -ENOMEM; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL); if (!pgmap) - return -ENOMEM; + goto err_pgmap; pgmap->type = MEMORY_DEVICE_GENERIC; - pgmap->res.name = "Xen scratch"; - pgmap->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->name = "Xen scratch"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - ret = allocate_resource(&iomem_resource, &pgmap->res, + ret = allocate_resource(&iomem_resource, res, alloc_pages * PAGE_SIZE, 0, -1, PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); if (ret < 0) { pr_err("Cannot allocate new IOMEM resource\n"); - kfree(pgmap); - return ret; + goto err_resource; } + pgmap->range = (struct range) { + .start = res->start, + .end = res->end, + }; + pgmap->owner = res; + #ifdef CONFIG_XEN_HAVE_PVMMU /* * memremap will build page tables for the new memory so @@ -50,14 +60,13 @@ static int fill_list(unsigned int nr_pages) * conflict with any devices. */ if (!xen_feature(XENFEAT_auto_translated_physmap)) { - xen_pfn_t pfn = PFN_DOWN(pgmap->res.start); + xen_pfn_t pfn = PFN_DOWN(res->start); for (i = 0; i < alloc_pages; i++) { if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { pr_warn("set_phys_to_machine() failed, no memory added\n"); - release_resource(&pgmap->res); - kfree(pgmap); - return -ENOMEM; + ret = -ENOMEM; + goto err_memremap; } } } @@ -66,9 +75,8 @@ static int fill_list(unsigned int nr_pages) vaddr = memremap_pages(pgmap, NUMA_NO_NODE); if (IS_ERR(vaddr)) { pr_err("Cannot remap memory range\n"); - release_resource(&pgmap->res); - kfree(pgmap); - return PTR_ERR(vaddr); + ret = PTR_ERR(vaddr); + goto err_memremap; } for (i = 0; i < alloc_pages; i++) { @@ -80,6 +88,14 @@ static int fill_list(unsigned int nr_pages) } return 0; + +err_memremap: + release_resource(res); +err_resource: + kfree(pgmap); +err_pgmap: + kfree(res); + return ret; } /** diff --git a/include/linux/memremap.h b/include/linux/memremap.h index e5862746751b..d0dd261d87c0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ +#include #include #include @@ -93,7 +94,7 @@ struct dev_pagemap_ops { /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations - * @res: physical address range covered by @ref + * @range: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @internal_ref @@ -106,7 +107,7 @@ struct dev_pagemap_ops { */ struct dev_pagemap { struct vmem_altmap altmap; - struct resource res; + struct range range; struct percpu_ref *ref; struct percpu_ref internal_ref; struct completion done; diff --git a/include/linux/range.h b/include/linux/range.h index d1fbeb664012..274681cc3154 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -1,12 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RANGE_H #define _LINUX_RANGE_H +#include struct range { u64 start; u64 end; }; +static inline u64 range_len(const struct range *range) +{ + return range->end - range->start + 1; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); diff --git a/lib/test_hmm.c b/lib/test_hmm.c index e7dc3de355b7..e97ca8ec0bce 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -460,6 +460,21 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, unsigned long pfn_last; void *ptr; + devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); + if (!devmem) + return -ENOMEM; + + res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, + "hmm_dmirror"); + if (IS_ERR(res)) + goto err_devmem; + + devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; + devmem->pagemap.range.start = res->start; + devmem->pagemap.range.end = res->end; + devmem->pagemap.ops = &dmirror_devmem_ops; + devmem->pagemap.owner = mdevice; + mutex_lock(&mdevice->devmem_lock); if (mdevice->devmem_count == mdevice->devmem_capacity) { @@ -472,33 +487,18 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, sizeof(new_chunks[0]) * new_capacity, GFP_KERNEL); if (!new_chunks) - goto err; + goto err_release; mdevice->devmem_capacity = new_capacity; mdevice->devmem_chunks = new_chunks; } - res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, - "hmm_dmirror"); - if (IS_ERR(res)) - goto err; - - devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); - if (!devmem) - goto err_release; - - devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; - devmem->pagemap.res = *res; - devmem->pagemap.ops = &dmirror_devmem_ops; - devmem->pagemap.owner = mdevice; - ptr = memremap_pages(&devmem->pagemap, numa_node_id()); if (IS_ERR(ptr)) - goto err_free; + goto err_release; devmem->mdevice = mdevice; - pfn_first = devmem->pagemap.res.start >> PAGE_SHIFT; - pfn_last = pfn_first + - (resource_size(&devmem->pagemap.res) >> PAGE_SHIFT); + pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; + pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT); mdevice->devmem_chunks[mdevice->devmem_count++] = devmem; mutex_unlock(&mdevice->devmem_lock); @@ -525,12 +525,12 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, return true; -err_free: - kfree(devmem); err_release: - release_mem_region(res->start, resource_size(res)); -err: mutex_unlock(&mdevice->devmem_lock); + release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range)); +err_devmem: + kfree(devmem); + return false; } @@ -1100,8 +1100,8 @@ static void dmirror_device_remove(struct dmirror_device *mdevice) mdevice->devmem_chunks[i]; memunmap_pages(&devmem->pagemap); - release_mem_region(devmem->pagemap.res.start, - resource_size(&devmem->pagemap.res)); + release_mem_region(devmem->pagemap.range.start, + range_len(&devmem->pagemap.range)); kfree(devmem); } kfree(mdevice->devmem_chunks); diff --git a/mm/memremap.c b/mm/memremap.c index 006dace60b1a..d958d348b3ca 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -70,24 +70,24 @@ static void devmap_managed_enable_put(void) } #endif /* CONFIG_DEV_PAGEMAP_OPS */ -static void pgmap_array_delete(struct resource *res) +static void pgmap_array_delete(struct range *range) { - xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end), + xa_store_range(&pgmap_array, PHYS_PFN(range->start), PHYS_PFN(range->end), NULL, GFP_KERNEL); synchronize_rcu(); } static unsigned long pfn_first(struct dev_pagemap *pgmap) { - return PHYS_PFN(pgmap->res.start) + + return PHYS_PFN(pgmap->range.start) + vmem_altmap_offset(pgmap_altmap(pgmap)); } static unsigned long pfn_end(struct dev_pagemap *pgmap) { - const struct resource *res = &pgmap->res; + const struct range *range = &pgmap->range; - return (res->start + resource_size(res)) >> PAGE_SHIFT; + return (range->start + range_len(range)) >> PAGE_SHIFT; } static unsigned long pfn_next(unsigned long pfn) @@ -126,7 +126,7 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap) void memunmap_pages(struct dev_pagemap *pgmap) { - struct resource *res = &pgmap->res; + struct range *range = &pgmap->range; struct page *first_page; unsigned long pfn; int nid; @@ -143,20 +143,20 @@ void memunmap_pages(struct dev_pagemap *pgmap) nid = page_to_nid(first_page); mem_hotplug_begin(); - remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(res->start), - PHYS_PFN(resource_size(res))); + remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(range->start), + PHYS_PFN(range_len(range))); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { - __remove_pages(PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), NULL); + __remove_pages(PHYS_PFN(range->start), + PHYS_PFN(range_len(range)), NULL); } else { - arch_remove_memory(nid, res->start, resource_size(res), + arch_remove_memory(nid, range->start, range_len(range), pgmap_altmap(pgmap)); - kasan_remove_zero_shadow(__va(res->start), resource_size(res)); + kasan_remove_zero_shadow(__va(range->start), range_len(range)); } mem_hotplug_done(); - untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res)); - pgmap_array_delete(res); + untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range)); + pgmap_array_delete(range); WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); devmap_managed_enable_put(); } @@ -182,7 +182,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref) */ void *memremap_pages(struct dev_pagemap *pgmap, int nid) { - struct resource *res = &pgmap->res; + struct range *range = &pgmap->range; struct dev_pagemap *conflict_pgmap; struct mhp_params params = { /* @@ -251,7 +251,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) return ERR_PTR(error); } - conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL); + conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL); if (conflict_pgmap) { WARN(1, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); @@ -259,7 +259,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) goto err_array; } - conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL); + conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL); if (conflict_pgmap) { WARN(1, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); @@ -267,26 +267,27 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) goto err_array; } - is_ram = region_intersects(res->start, resource_size(res), + is_ram = region_intersects(range->start, range_len(range), IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); if (is_ram != REGION_DISJOINT) { - WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__, - is_ram == REGION_MIXED ? "mixed" : "ram", res); + WARN_ONCE(1, "attempted on %s region %#llx-%#llx\n", + is_ram == REGION_MIXED ? "mixed" : "ram", + range->start, range->end); error = -ENXIO; goto err_array; } - error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start), - PHYS_PFN(res->end), pgmap, GFP_KERNEL)); + error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(range->start), + PHYS_PFN(range->end), pgmap, GFP_KERNEL)); if (error) goto err_array; if (nid < 0) nid = numa_mem_id(); - error = track_pfn_remap(NULL, ¶ms.pgprot, PHYS_PFN(res->start), - 0, resource_size(res)); + error = track_pfn_remap(NULL, ¶ms.pgprot, PHYS_PFN(range->start), 0, + range_len(range)); if (error) goto err_pfn_remap; @@ -304,16 +305,16 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) * arch_add_memory(). */ if (pgmap->type == MEMORY_DEVICE_PRIVATE) { - error = add_pages(nid, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), ¶ms); + error = add_pages(nid, PHYS_PFN(range->start), + PHYS_PFN(range_len(range)), ¶ms); } else { - error = kasan_add_zero_shadow(__va(res->start), resource_size(res)); + error = kasan_add_zero_shadow(__va(range->start), range_len(range)); if (error) { mem_hotplug_done(); goto err_kasan; } - error = arch_add_memory(nid, res->start, resource_size(res), + error = arch_add_memory(nid, range->start, range_len(range), ¶ms); } @@ -321,8 +322,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) struct zone *zone; zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; - move_pfn_range_to_zone(zone, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), params.altmap); + move_pfn_range_to_zone(zone, PHYS_PFN(range->start), + PHYS_PFN(range_len(range)), params.altmap); } mem_hotplug_done(); @@ -334,17 +335,17 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) * to allow us to do the work while not holding the hotplug lock. */ memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], - PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), pgmap); + PHYS_PFN(range->start), + PHYS_PFN(range_len(range)), pgmap); percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap)); - return __va(res->start); + return __va(range->start); err_add_memory: - kasan_remove_zero_shadow(__va(res->start), resource_size(res)); + kasan_remove_zero_shadow(__va(range->start), range_len(range)); err_kasan: - untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res)); + untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range)); err_pfn_remap: - pgmap_array_delete(res); + pgmap_array_delete(range); err_array: dev_pagemap_kill(pgmap); dev_pagemap_cleanup(pgmap); @@ -369,7 +370,7 @@ EXPORT_SYMBOL_GPL(memremap_pages); * 'live' on entry and will be killed and reaped at * devm_memremap_pages_release() time, or if this routine fails. * - * 4/ res is expected to be a host memory range that could feasibly be + * 4/ range is expected to be a host memory range that could feasibly be * treated as a "System RAM" range, i.e. not a device mmio range, but * this is not enforced. */ @@ -426,7 +427,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, * In the cached case we're already holding a live reference. */ if (pgmap) { - if (phys >= pgmap->res.start && phys <= pgmap->res.end) + if (phys >= pgmap->range.start && phys <= pgmap->range.end) return pgmap; put_dev_pagemap(pgmap); } diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 03e40b3b0106..c62d372d426f 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref) void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { int error; - resource_size_t offset = pgmap->res.start; + resource_size_t offset = pgmap->range.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (!nfit_res) -- cgit v1.3.1 From 60e93dc097f7f13a16a7e4b75b8803eb2adbb721 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:50:39 -0700 Subject: device-dax: add dis-contiguous resource support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Break the requirement that device-dax instances are physically contiguous. With this constraint removed it allows fragmented available capacity to be fully allocated. This capability is useful to mitigate the "noisy neighbor" problem with memory-side-cache management for virtual machines, or any other scenario where a platform address boundary also designates a performance boundary. For example a direct mapped memory side cache might rotate cache colors at 1GB boundaries. With dis-contiguous allocations a device-dax instance could be configured to contain only 1 cache color. It also satisfies Joao's use case (see link) for partitioning memory for exclusive guest access. It allows for a future potential mode where the host kernel need not allocate 'struct page' capacity up-front. Reported-by: Joao Martins Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Ben Skeggs Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Brice Goglin Cc: Catalin Marinas Cc: Daniel Vetter Cc: Dave Hansen Cc: Dave Jiang Cc: David Airlie Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Hulk Robot Cc: Ingo Molnar Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jason Yan Cc: Jeff Moyer Cc: "Jérôme Glisse" Cc: Jia He Cc: Jonathan Cameron Cc: Juergen Gross Cc: kernel test robot Cc: Michael Ellerman Cc: Mike Rapoport Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Randy Dunlap Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Vishal Verma Cc: Vivek Goyal Cc: Wei Yang Cc: Will Deacon Link: https://lore.kernel.org/lkml/20200110190313.17144-1-joao.m.martins@oracle.com/ Link: https://lkml.kernel.org/r/159643104304.4062302.16561669534797528660.stgit@dwillia2-desk3.amr.corp.intel.com Link: https://lkml.kernel.org/r/160106116875.30709.11456649969327399771.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- drivers/dax/bus.c | 233 +++++++++++++++++++++++++++++++---------- drivers/dax/dax-private.h | 9 +- drivers/dax/device.c | 55 ++++++---- drivers/dax/kmem.c | 130 +++++++++++++++-------- tools/testing/nvdimm/dax-dev.c | 20 ++-- 5 files changed, 323 insertions(+), 124 deletions(-) (limited to 'tools') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 00fa73a8dfb4..06a789aba58a 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -136,15 +136,27 @@ static bool is_static(struct dax_region *dax_region) return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; } +static u64 dev_dax_size(struct dev_dax *dev_dax) +{ + u64 size = 0; + int i; + + device_lock_assert(&dev_dax->dev); + + for (i = 0; i < dev_dax->nr_range; i++) + size += range_len(&dev_dax->ranges[i].range); + + return size; +} + static int dax_bus_probe(struct device *dev) { struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); struct dev_dax *dev_dax = to_dev_dax(dev); struct dax_region *dax_region = dev_dax->region; - struct range *range = &dev_dax->range; int rc; - if (range_len(range) == 0 || dev_dax->id < 0) + if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0) return -ENXIO; rc = dax_drv->probe(dev_dax); @@ -354,15 +366,19 @@ void kill_dev_dax(struct dev_dax *dev_dax) } EXPORT_SYMBOL_GPL(kill_dev_dax); -static void free_dev_dax_range(struct dev_dax *dev_dax) +static void free_dev_dax_ranges(struct dev_dax *dev_dax) { struct dax_region *dax_region = dev_dax->region; - struct range *range = &dev_dax->range; + int i; device_lock_assert(dax_region->dev); - if (range_len(range)) + for (i = 0; i < dev_dax->nr_range; i++) { + struct range *range = &dev_dax->ranges[i].range; + __release_region(&dax_region->res, range->start, range_len(range)); + } + dev_dax->nr_range = 0; } static void unregister_dev_dax(void *dev) @@ -372,7 +388,7 @@ static void unregister_dev_dax(void *dev) dev_dbg(dev, "%s\n", __func__); kill_dev_dax(dev_dax); - free_dev_dax_range(dev_dax); + free_dev_dax_ranges(dev_dax); device_del(dev); put_device(dev); } @@ -423,7 +439,7 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr, device_lock(dev); device_lock(victim); dev_dax = to_dev_dax(victim); - if (victim->driver || range_len(&dev_dax->range)) + if (victim->driver || dev_dax_size(dev_dax)) rc = -EBUSY; else { /* @@ -569,51 +585,86 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, struct dax_region *dax_region = dev_dax->region; struct resource *res = &dax_region->res; struct device *dev = &dev_dax->dev; + struct dev_dax_range *ranges; + unsigned long pgoff = 0; struct resource *alloc; + int i; device_lock_assert(dax_region->dev); /* handle the seed alloc special case */ if (!size) { - dev_dax->range = (struct range) { - .start = res->start, - .end = res->start - 1, - }; + if (dev_WARN_ONCE(dev, dev_dax->nr_range, + "0-size allocation must be first\n")) + return -EBUSY; + /* nr_range == 0 is elsewhere special cased as 0-size device */ return 0; } + ranges = krealloc(dev_dax->ranges, sizeof(*ranges) + * (dev_dax->nr_range + 1), GFP_KERNEL); + if (!ranges) + return -ENOMEM; + alloc = __request_region(res, start, size, dev_name(dev), 0); - if (!alloc) + if (!alloc) { + /* + * If this was an empty set of ranges nothing else + * will release @ranges, so do it now. + */ + if (!dev_dax->nr_range) { + kfree(ranges); + ranges = NULL; + } + dev_dax->ranges = ranges; return -ENOMEM; + } - dev_dax->range = (struct range) { - .start = alloc->start, - .end = alloc->end, + for (i = 0; i < dev_dax->nr_range; i++) + pgoff += PHYS_PFN(range_len(&ranges[i].range)); + dev_dax->ranges = ranges; + ranges[dev_dax->nr_range++] = (struct dev_dax_range) { + .pgoff = pgoff, + .range = { + .start = alloc->start, + .end = alloc->end, + }, }; + dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, + &alloc->start, &alloc->end); + return 0; } static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) { + int last_range = dev_dax->nr_range - 1; + struct dev_dax_range *dax_range = &dev_dax->ranges[last_range]; struct dax_region *dax_region = dev_dax->region; - struct range *range = &dev_dax->range; - int rc = 0; + bool is_shrink = resource_size(res) > size; + struct range *range = &dax_range->range; + struct device *dev = &dev_dax->dev; + int rc; device_lock_assert(dax_region->dev); - if (size) - rc = adjust_resource(res, range->start, size); - else - __release_region(&dax_region->res, range->start, range_len(range)); + if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n")) + return -EINVAL; + + rc = adjust_resource(res, range->start, size); if (rc) return rc; - dev_dax->range = (struct range) { + *range = (struct range) { .start = range->start, .end = range->start + size - 1, }; + dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend", + last_range, (unsigned long long) range->start, + (unsigned long long) range->end); + return 0; } @@ -621,7 +672,11 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_dax *dev_dax = to_dev_dax(dev); - unsigned long long size = range_len(&dev_dax->range); + unsigned long long size; + + device_lock(dev); + size = dev_dax_size(dev_dax); + device_unlock(dev); return sprintf(buf, "%llu\n", size); } @@ -639,32 +694,82 @@ static bool alloc_is_aligned(struct dax_region *dax_region, static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) { + resource_size_t to_shrink = dev_dax_size(dev_dax) - size; struct dax_region *dax_region = dev_dax->region; - struct range *range = &dev_dax->range; - struct resource *res, *adjust = NULL; struct device *dev = &dev_dax->dev; - - for_each_dax_region_resource(dax_region, res) - if (strcmp(res->name, dev_name(dev)) == 0 - && res->start == range->start) { - adjust = res; - break; + int i; + + for (i = dev_dax->nr_range - 1; i >= 0; i--) { + struct range *range = &dev_dax->ranges[i].range; + struct resource *adjust = NULL, *res; + resource_size_t shrink; + + shrink = min_t(u64, to_shrink, range_len(range)); + if (shrink >= range_len(range)) { + __release_region(&dax_region->res, range->start, + range_len(range)); + dev_dax->nr_range--; + dev_dbg(dev, "delete range[%d]: %#llx:%#llx\n", i, + (unsigned long long) range->start, + (unsigned long long) range->end); + to_shrink -= shrink; + if (!to_shrink) + break; + continue; } - if (dev_WARN_ONCE(dev, !adjust, "failed to find matching resource\n")) - return -ENXIO; - return adjust_dev_dax_range(dev_dax, adjust, size); + for_each_dax_region_resource(dax_region, res) + if (strcmp(res->name, dev_name(dev)) == 0 + && res->start == range->start) { + adjust = res; + break; + } + + if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1, + "failed to find matching resource\n")) + return -ENXIO; + return adjust_dev_dax_range(dev_dax, adjust, range_len(range) + - shrink); + } + return 0; +} + +/* + * Only allow adjustments that preserve the relative pgoff of existing + * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff. + */ +static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res) +{ + struct dev_dax_range *last; + int i; + + if (dev_dax->nr_range == 0) + return false; + if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0) + return false; + last = &dev_dax->ranges[dev_dax->nr_range - 1]; + if (last->range.start != res->start || last->range.end != res->end) + return false; + for (i = 0; i < dev_dax->nr_range - 1; i++) { + struct dev_dax_range *dax_range = &dev_dax->ranges[i]; + + if (dax_range->pgoff > last->pgoff) + return false; + } + + return true; } static ssize_t dev_dax_resize(struct dax_region *dax_region, struct dev_dax *dev_dax, resource_size_t size) { resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; - resource_size_t dev_size = range_len(&dev_dax->range); + resource_size_t dev_size = dev_dax_size(dev_dax); struct resource *region_res = &dax_region->res; struct device *dev = &dev_dax->dev; - const char *name = dev_name(dev); struct resource *res, *first; + resource_size_t alloc = 0; + int rc; if (dev->driver) return -EBUSY; @@ -685,35 +790,47 @@ static ssize_t dev_dax_resize(struct dax_region *dax_region, * may involve adjusting the end of an existing resource, or * allocating a new resource. */ +retry: first = region_res->child; if (!first) return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); - for (res = first; to_alloc && res; res = res->sibling) { + + rc = -ENOSPC; + for (res = first; res; res = res->sibling) { struct resource *next = res->sibling; - resource_size_t free; /* space at the beginning of the region */ - free = 0; - if (res == first && res->start > dax_region->res.start) - free = res->start - dax_region->res.start; - if (free >= to_alloc && dev_size == 0) - return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); + if (res == first && res->start > dax_region->res.start) { + alloc = min(res->start - dax_region->res.start, to_alloc); + rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc); + break; + } - free = 0; + alloc = 0; /* space between allocations */ if (next && next->start > res->end + 1) - free = next->start - res->end + 1; + alloc = min(next->start - (res->end + 1), to_alloc); /* space at the end of the region */ - if (free < to_alloc && !next && res->end < region_res->end) - free = region_res->end - res->end; + if (!alloc && !next && res->end < region_res->end) + alloc = min(region_res->end - res->end, to_alloc); - if (free >= to_alloc && strcmp(name, res->name) == 0) - return adjust_dev_dax_range(dev_dax, res, resource_size(res) + to_alloc); - else if (free >= to_alloc && dev_size == 0) - return alloc_dev_dax_range(dev_dax, res->end + 1, to_alloc); + if (!alloc) + continue; + + if (adjust_ok(dev_dax, res)) { + rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc); + break; + } + rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc); + break; } - return -ENOSPC; + if (rc) + return rc; + to_alloc -= alloc; + if (to_alloc) + goto retry; + return 0; } static ssize_t size_store(struct device *dev, struct device_attribute *attr, @@ -767,8 +884,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_dax *dev_dax = to_dev_dax(dev); + struct dax_region *dax_region = dev_dax->region; + unsigned long long start; + + if (dev_dax->nr_range < 1) + start = dax_region->res.start; + else + start = dev_dax->ranges[0].range.start; - return sprintf(buf, "%#llx\n", dev_dax->range.start); + return sprintf(buf, "%#llx\n", start); } static DEVICE_ATTR(resource, 0400, resource_show, NULL); @@ -833,6 +957,7 @@ static void dev_dax_release(struct device *dev) put_dax(dax_dev); free_dev_dax_id(dev_dax); dax_region_put(dax_region); + kfree(dev_dax->ranges); kfree(dev_dax->pgmap); kfree(dev_dax); } @@ -941,7 +1066,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) err_alloc_dax: kfree(dev_dax->pgmap); err_pgmap: - free_dev_dax_range(dev_dax); + free_dev_dax_ranges(dev_dax); err_range: free_dev_dax_id(dev_dax); err_id: diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 0cbb2ec81ca7..f863287107fd 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -49,7 +49,8 @@ struct dax_region { * @id: ida allocated id * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) - * @range: resource range for the instance + * @nr_range: size of @ranges + * @ranges: resource-span + pgoff tuples for the instance */ struct dev_dax { struct dax_region *region; @@ -58,7 +59,11 @@ struct dev_dax { int id; struct device dev; struct dev_pagemap *pgmap; - struct range range; + int nr_range; + struct dev_dax_range { + unsigned long pgoff; + struct range range; + } *ranges; }; static inline struct dev_dax *to_dev_dax(struct device *dev) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 5f808617672a..bf389712a20b 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -55,15 +55,22 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) { - struct range *range = &dev_dax->range; - phys_addr_t phys; - - phys = pgoff * PAGE_SIZE + range->start; - if (phys >= range->start && phys <= range->end) { + int i; + + for (i = 0; i < dev_dax->nr_range; i++) { + struct dev_dax_range *dax_range = &dev_dax->ranges[i]; + struct range *range = &dax_range->range; + unsigned long long pgoff_end; + phys_addr_t phys; + + pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1; + if (pgoff < dax_range->pgoff || pgoff > pgoff_end) + continue; + phys = PFN_PHYS(pgoff - dax_range->pgoff) + range->start; if (phys + size - 1 <= range->end) return phys; + break; } - return -1; } @@ -395,30 +402,40 @@ static void dev_dax_kill(void *dev_dax) int dev_dax_probe(struct dev_dax *dev_dax) { struct dax_device *dax_dev = dev_dax->dax_dev; - struct range *range = &dev_dax->range; struct device *dev = &dev_dax->dev; struct dev_pagemap *pgmap; struct inode *inode; struct cdev *cdev; void *addr; - int rc; - - /* 1:1 map region resource range to device-dax instance range */ - if (!devm_request_mem_region(dev, range->start, range_len(range), - dev_name(dev))) { - dev_warn(dev, "could not reserve range: %#llx - %#llx\n", - range->start, range->end); - return -EBUSY; - } + int rc, i; pgmap = dev_dax->pgmap; + if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1, + "static pgmap / multi-range device conflict\n")) + return -EINVAL; + if (!pgmap) { - pgmap = devm_kzalloc(dev, sizeof(*pgmap), GFP_KERNEL); + pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range) + * (dev_dax->nr_range - 1), GFP_KERNEL); if (!pgmap) return -ENOMEM; - pgmap->range = *range; - pgmap->nr_range = 1; + pgmap->nr_range = dev_dax->nr_range; + } + + for (i = 0; i < dev_dax->nr_range; i++) { + struct range *range = &dev_dax->ranges[i].range; + + if (!devm_request_mem_region(dev, range->start, + range_len(range), dev_name(dev))) { + dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n", + i, range->start, range->end); + return -EBUSY; + } + /* don't update the range for static pgmap */ + if (!dev_dax->pgmap) + pgmap->ranges[i] = *range; } + pgmap->type = MEMORY_DEVICE_GENERIC; addr = devm_memremap_pages(dev, pgmap); if (IS_ERR(addr)) diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index c2ac465cc342..6c933f2b604e 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -19,24 +19,28 @@ static const char *kmem_name; /* Set if any memory will remain added when the driver will be unloaded. */ static bool any_hotremove_failed; -static struct range dax_kmem_range(struct dev_dax *dev_dax) +static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r) { - struct range range; + struct dev_dax_range *dax_range = &dev_dax->ranges[i]; + struct range *range = &dax_range->range; /* memory-block align the hotplug range */ - range.start = ALIGN(dev_dax->range.start, memory_block_size_bytes()); - range.end = ALIGN_DOWN(dev_dax->range.end + 1, memory_block_size_bytes()) - 1; - return range; + r->start = ALIGN(range->start, memory_block_size_bytes()); + r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1; + if (r->start >= r->end) { + r->start = range->start; + r->end = range->end; + return -ENOSPC; + } + return 0; } static int dev_dax_kmem_probe(struct dev_dax *dev_dax) { - struct range range = dax_kmem_range(dev_dax); struct device *dev = &dev_dax->dev; - struct resource *res; + int i, mapped = 0; char *res_name; int numa_node; - int rc; /* * Ensure good NUMA information for the persistent memory. @@ -55,31 +59,58 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) if (!res_name) return -ENOMEM; - /* Region is permanently reserved if hotremove fails. */ - res = request_mem_region(range.start, range_len(&range), res_name); - if (!res) { - dev_warn(dev, "could not reserve region [%#llx-%#llx]\n", range.start, range.end); - kfree(res_name); - return -EBUSY; - } - - /* - * Set flags appropriate for System RAM. Leave ..._BUSY clear - * so that add_memory() can add a child resource. Do not - * inherit flags from the parent since it may set new flags - * unknown to us that will break add_memory() below. - */ - res->flags = IORESOURCE_SYSTEM_RAM; - - /* - * Ensure that future kexec'd kernels will not treat this as RAM - * automatically. - */ - rc = add_memory_driver_managed(numa_node, range.start, range_len(&range), kmem_name); - if (rc) { - release_mem_region(range.start, range_len(&range)); - kfree(res_name); - return rc; + for (i = 0; i < dev_dax->nr_range; i++) { + struct resource *res; + struct range range; + int rc; + + rc = dax_kmem_range(dev_dax, i, &range); + if (rc) { + dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n", + i, range.start, range.end); + continue; + } + + /* Region is permanently reserved if hotremove fails. */ + res = request_mem_region(range.start, range_len(&range), res_name); + if (!res) { + dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n", + i, range.start, range.end); + /* + * Once some memory has been onlined we can't + * assume that it can be un-onlined safely. + */ + if (mapped) + continue; + kfree(res_name); + return -EBUSY; + } + + /* + * Set flags appropriate for System RAM. Leave ..._BUSY clear + * so that add_memory() can add a child resource. Do not + * inherit flags from the parent since it may set new flags + * unknown to us that will break add_memory() below. + */ + res->flags = IORESOURCE_SYSTEM_RAM; + + /* + * Ensure that future kexec'd kernels will not treat + * this as RAM automatically. + */ + rc = add_memory_driver_managed(numa_node, range.start, + range_len(&range), kmem_name); + + if (rc) { + dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", + i, range.start, range.end); + release_mem_region(range.start, range_len(&range)); + if (mapped) + continue; + kfree(res_name); + return rc; + } + mapped++; } dev_set_drvdata(dev, res_name); @@ -90,9 +121,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) #ifdef CONFIG_MEMORY_HOTREMOVE static int dev_dax_kmem_remove(struct dev_dax *dev_dax) { - int rc; + int i, success = 0; struct device *dev = &dev_dax->dev; - struct range range = dax_kmem_range(dev_dax); const char *res_name = dev_get_drvdata(dev); /* @@ -101,17 +131,31 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax) * there is no way to hotremove this memory until reboot because device * unbind will succeed even if we return failure. */ - rc = remove_memory(dev_dax->target_node, range.start, range_len(&range)); - if (rc) { + for (i = 0; i < dev_dax->nr_range; i++) { + struct range range; + int rc; + + rc = dax_kmem_range(dev_dax, i, &range); + if (rc) + continue; + + rc = remove_memory(dev_dax->target_node, range.start, + range_len(&range)); + if (rc == 0) { + release_mem_region(range.start, range_len(&range)); + success++; + continue; + } any_hotremove_failed = true; - dev_err(dev, "%#llx-%#llx cannot be hotremoved until the next reboot\n", - range.start, range.end); - return rc; + dev_err(dev, + "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n", + i, range.start, range.end); } - /* Release and free dax resources */ - release_mem_region(range.start, range_len(&range)); - kfree(res_name); + if (success >= dev_dax->nr_range) { + kfree(res_name); + dev_set_drvdata(dev, NULL); + } return 0; } diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c index 38d8e55c4a0d..fb342a8c98d3 100644 --- a/tools/testing/nvdimm/dax-dev.c +++ b/tools/testing/nvdimm/dax-dev.c @@ -9,11 +9,18 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) { - struct range *range = &dev_dax->range; - phys_addr_t addr; + int i; - addr = pgoff * PAGE_SIZE + range->start; - if (addr >= range->start && addr <= range->end) { + for (i = 0; i < dev_dax->nr_range; i++) { + struct dev_dax_range *dax_range = &dev_dax->ranges[i]; + struct range *range = &dax_range->range; + unsigned long long pgoff_end; + phys_addr_t addr; + + pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1; + if (pgoff < dax_range->pgoff || pgoff > pgoff_end) + continue; + addr = PFN_PHYS(pgoff - dax_range->pgoff) + range->start; if (addr + size - 1 <= range->end) { if (get_nfit_res(addr)) { struct page *page; @@ -23,9 +30,10 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, page = vmalloc_to_page((void *)addr); return PFN_PHYS(page_to_pfn(page)); - } else - return addr; + } + return addr; } + break; } return -1; } -- cgit v1.3.1 From 657d4f7996c6a4235069d8f9a47b64af2f007dbc Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 13 Oct 2020 16:51:54 -0700 Subject: mm/gup_benchmark: use pin_user_pages for FOLL_LONGTERM flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Documentation/core-api/pin_user_pages.rst, FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying that is, FOLL_LONGTERM is a specific case, more restrictive case of FOLL_PIN. Almost all kernel modules are using pin_user_pages() with FOLL_LONGTERM, mm/gup_benchmark.c seems to the only exception in which FOLL_PIN is not a prerequisite to FOLL_LONGTERM. Signed-off-by: Barry Song Signed-off-by: Andrew Morton Reviewed-by: John Hubbard Cc: Jan Kara Cc: Jérôme Glisse Cc: "Matthew Wilcox (Oracle)" Cc: Al Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Michal Hocko Cc: Mike Kravetz Cc: Shuah Khan Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/20200815122056.29508-1-song.bao.hua@hisilicon.com Signed-off-by: Linus Torvalds --- mm/gup_benchmark.c | 23 ++++++++++++----------- tools/testing/selftests/vm/gup_benchmark.c | 14 +++++++------- 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c index be690fa66a46..464cae1fa3ea 100644 --- a/mm/gup_benchmark.c +++ b/mm/gup_benchmark.c @@ -6,10 +6,10 @@ #include #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) -#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) -#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) -#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark) -#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark) +#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark) +#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark) +#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark) struct gup_benchmark { __u64 get_delta_usec; @@ -28,7 +28,6 @@ static void put_back_pages(unsigned int cmd, struct page **pages, switch (cmd) { case GUP_FAST_BENCHMARK: - case GUP_LONGTERM_BENCHMARK: case GUP_BENCHMARK: for (i = 0; i < nr_pages; i++) put_page(pages[i]); @@ -36,6 +35,7 @@ static void put_back_pages(unsigned int cmd, struct page **pages, case PIN_FAST_BENCHMARK: case PIN_BENCHMARK: + case PIN_LONGTERM_BENCHMARK: unpin_user_pages(pages, nr_pages); break; } @@ -50,6 +50,7 @@ static void verify_dma_pinned(unsigned int cmd, struct page **pages, switch (cmd) { case PIN_FAST_BENCHMARK: case PIN_BENCHMARK: + case PIN_LONGTERM_BENCHMARK: for (i = 0; i < nr_pages; i++) { page = pages[i]; if (WARN(!page_maybe_dma_pinned(page), @@ -101,11 +102,6 @@ static int __gup_benchmark_ioctl(unsigned int cmd, nr = get_user_pages_fast(addr, nr, gup->flags, pages + i); break; - case GUP_LONGTERM_BENCHMARK: - nr = get_user_pages(addr, nr, - gup->flags | FOLL_LONGTERM, - pages + i, NULL); - break; case GUP_BENCHMARK: nr = get_user_pages(addr, nr, gup->flags, pages + i, NULL); @@ -118,6 +114,11 @@ static int __gup_benchmark_ioctl(unsigned int cmd, nr = pin_user_pages(addr, nr, gup->flags, pages + i, NULL); break; + case PIN_LONGTERM_BENCHMARK: + nr = pin_user_pages(addr, nr, + gup->flags | FOLL_LONGTERM, + pages + i, NULL); + break; default: kvfree(pages); ret = -EINVAL; @@ -162,10 +163,10 @@ static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd, switch (cmd) { case GUP_FAST_BENCHMARK: - case GUP_LONGTERM_BENCHMARK: case GUP_BENCHMARK: case PIN_FAST_BENCHMARK: case PIN_BENCHMARK: + case PIN_LONGTERM_BENCHMARK: break; default: return -EINVAL; diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 43b4dfe161a2..31f8bb086907 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -15,12 +15,12 @@ #define PAGE_SIZE sysconf(_SC_PAGESIZE) #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) -#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) -#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark) /* Similar to above, but use FOLL_PIN instead of FOLL_GET. */ -#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark) -#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark) +#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark) +#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark) /* Just the flags we need, copied from mm.h: */ #define FOLL_WRITE 0x01 /* check pte is writable */ @@ -52,6 +52,9 @@ int main(int argc, char **argv) case 'b': cmd = PIN_BENCHMARK; break; + case 'L': + cmd = PIN_LONGTERM_BENCHMARK; + break; case 'm': size = atoi(optarg) * MB; break; @@ -67,9 +70,6 @@ int main(int argc, char **argv) case 'T': thp = 0; break; - case 'L': - cmd = GUP_LONGTERM_BENCHMARK; - break; case 'U': cmd = GUP_BENCHMARK; break; -- cgit v1.3.1 From efc9511cecf617c828734024f4e1cde5f974f510 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 13 Oct 2020 16:53:16 -0700 Subject: selftests/vm: fix false build success on the second and later attempts Patch series "selftests/vm: fix some minor aggravating factors in the Makefile". This fixes a couple of minor aggravating factors that I ran across while trying to do some changes in selftests/vm. These are simple things, but like most things with GNU Make, it's rarely obvious what's wrong until you understand *the entire Makefile and all of its includes*. So while there is, of course, joy in learning those details, I thought I'd fix these little things, so as to allow others to skip out on the Joy if they so choose. :) First of all, if you have an item (let's choose userfaultfd for an example) that fails to build, you might do this: $ make -j32 # ...you observe a failed item in the threaded output # OK, let's get a closer look $ make # ...but now the build quietly "succeeds". That's what Patch 0001 fixes. Second, if you instead attempt this approach for your closer look (a casual mistake, as it's not supported): $ make userfaultfd # ...userfaultfd fails to link, due to incomplete LDLIBS That's what Patch 0002 fixes. This patch (of 2): If one or more of these selftest fail to build, then after the first failure, subsequent invocations of "make" will make it appear that there are no build failures, after all. That's because the failed build products remain, with up-to-date timestamps, thus tricking Make (and you!) into believing that there's nothing else to build. Fix this by telling Make to delete targets that didn't completely succeed. Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Cc: Shuah Khan Cc: Jason Gunthorpe Link: https://lkml.kernel.org/r/20200915012901.1655280-1-jhubbard@nvidia.com Link: https://lkml.kernel.org/r/20200915012901.1655280-2-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index a9026706d597..9f2625bebf07 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -3,6 +3,11 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') +# Without this, failed build products remain, with up-to-date timestamps, +# thus tricking Make (and you!) into believing that All Is Well, in subsequent +# make invocations: +.DELETE_ON_ERROR: + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test -- cgit v1.3.1 From 34d109131f485eccd3f7e3050581eb73bffa3520 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 13 Oct 2020 16:53:19 -0700 Subject: selftests/vm: fix incorrect gcc invocation in some cases Avoid accidental wrong builds, due to built-in rules working just a little bit too well--but not quite as well as required for our situation here. In other words, "make userfaultfd" (for example) is supposed to fail to build at all, because this Makefile only supports either "make" (all), or "make /full/path". However, the built-in rules, if not suppressed, will pick up CFLAGS and the initial LDLIBS (but not the target-specific LDLIBS, because those are only set for the full path target!). This causes it to get pretty far into building things despite using incorrect values such as an *occasionally* incomplete LDLIBS value. Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Cc: Shuah Khan Cc: Jason Gunthorpe Link: https://lkml.kernel.org/r/20200915012901.1655280-3-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 9f2625bebf07..30873b19d04b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -8,6 +8,18 @@ MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') # make invocations: .DELETE_ON_ERROR: +# Avoid accidental wrong builds, due to built-in rules working just a little +# bit too well--but not quite as well as required for our situation here. +# +# In other words, "make userfaultfd" is supposed to fail to build at all, +# because this Makefile only supports either "make" (all), or "make /full/path". +# However, the built-in rules, if not suppressed, will pick up CFLAGS and the +# initial LDLIBS (but not the target-specific LDLIBS, because those are only +# set for the full path target!). This causes it to get pretty far into building +# things despite using incorrect values such as an *occasionally* incomplete +# LDLIBS. +MAKEFLAGS += --no-builtin-rules + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test -- cgit v1.3.1 From bfe18a0900f1188e323d3f2c3cd2d6dfe2d0789c Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 Oct 2020 16:54:29 -0700 Subject: tools/testing/selftests/vm/hmm-tests.c: use the new SKIP() macro Some tests might not be able to be run if resources like huge pages are not available. Mark these tests as skipped instead of simply passing. Signed-off-by: Ralph Campbell Signed-off-by: Andrew Morton Reviewed-by: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Shuah Khan Link: http://lkml.kernel.org/r/20200827190400.12608-1-rcampbell@nvidia.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/hmm-tests.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 93fc5cadce61..0a28a6a29581 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -680,7 +680,7 @@ TEST_F(hmm, anon_write_hugetlbfs) n = gethugepagesizes(pagesizes, 4); if (n <= 0) - return; + SKIP(return, "Huge page size could not be determined"); for (idx = 0; --n > 0; ) { if (pagesizes[n] < pagesizes[idx]) idx = n; @@ -694,7 +694,7 @@ TEST_F(hmm, anon_write_hugetlbfs) buffer->ptr = get_hugepage_region(size, GHR_STRICT); if (buffer->ptr == NULL) { free(buffer); - return; + SKIP(return, "Huge page could not be allocated"); } buffer->fd = -1; -- cgit v1.3.1 From 1100262037be8008cc85240389fbe5eac4df034d Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 13 Oct 2020 16:57:04 -0700 Subject: selftests/vm: 8x compaction_test speedup This patch reduces the running time for compaction_test from about 27 sec, to 3.3 sec, which is about an 8x speedup. These numbers are for an Intel x86_64 system with 32 GB of DRAM. The compaction_test.c program was spending most of its time doing mmap(), 1 MB at a time, on about 25 GB of memory. Instead, do the mmaps 100 MB at a time. (Going past 100 MB doesn't make things go much faster, because other parts of the program are using the remaining time.) Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Acked-by: Sri Jayaramappa Cc: Shuah Khan Cc: Mel Gorman Link: https://lkml.kernel.org/r/20201002080621.551044-2-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/compaction_test.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index bcec71250873..9b420140ba2b 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -18,7 +18,8 @@ #include "../kselftest.h" -#define MAP_SIZE 1048576 +#define MAP_SIZE_MB 100 +#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024) struct map_list { void *map; @@ -165,7 +166,7 @@ int main(int argc, char **argv) void *map = NULL; unsigned long mem_free = 0; unsigned long hugepage_size = 0; - unsigned long mem_fragmentable = 0; + long mem_fragmentable_MB = 0; if (prereq() != 0) { printf("Either the sysctl compact_unevictable_allowed is not\n" @@ -190,9 +191,9 @@ int main(int argc, char **argv) return -1; } - mem_fragmentable = mem_free * 0.8 / 1024; + mem_fragmentable_MB = mem_free * 0.8 / 1024; - while (mem_fragmentable > 0) { + while (mem_fragmentable_MB > 0) { map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); if (map == MAP_FAILED) @@ -213,7 +214,7 @@ int main(int argc, char **argv) for (i = 0; i < MAP_SIZE; i += page_size) *(unsigned long *)(map + i) = (unsigned long)map + i; - mem_fragmentable--; + mem_fragmentable_MB -= MAP_SIZE_MB; } for (entry = list; entry != NULL; entry = entry->next) { -- cgit v1.3.1 From 996f9e0f93f16211945c8d5f18f296a88cb32f91 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 14 Oct 2020 13:47:11 +1100 Subject: selftests/powerpc: Fix eeh-basic.sh exit codes The kselftests test running infrastructure expects tests to finish with an exit code of 4 if the test decided it should be skipped. Currently eeh-basic.sh exits with the number of devices that failed to recover, so if four devices didn't recover we'll report a skip instead of a fail. Fix this by checking if the return code is non-zero and report success and failure by returning 0 or 1 respectively. For the cases where should actually skip return 4. Fixes: 85d86c8aa52e ("selftests/powerpc: Add basic EEH selftest") Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201014024711.1138386-1-oohall@gmail.com --- tools/testing/selftests/powerpc/eeh/eeh-basic.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh index 8a8d0f456946..0d783e1065c8 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -1,17 +1,19 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only +KSELFTESTS_SKIP=4 + . ./eeh-functions.sh if ! eeh_supported ; then echo "EEH not supported on this system, skipping" - exit 0; + exit $KSELFTESTS_SKIP; fi if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then echo "debugfs EEH testing files are missing. Is debugfs mounted?" - exit 1; + exit $KSELFTESTS_SKIP; fi pre_lspci=`mktemp` @@ -84,4 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)" lspci | diff -u $pre_lspci - rm -f $pre_lspci -exit $failed +test "$failed" == 0 +exit $? -- cgit v1.3.1 From 0aba7f036a56675b7fdc536757b4c49fc76a2208 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:33 +0200 Subject: perf tools: Use build_id object in dso Replace build_id byte array with struct build_id object and all the code that references it. The objective is to carry size together with build id array, so it's better to keep both together. This is preparatory change for following patches, and there's no functional change. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/inject-buildid.c | 4 ++-- tools/perf/builtin-buildid-cache.c | 2 +- tools/perf/builtin-inject.c | 4 ++-- tools/perf/util/annotate.c | 4 ++-- tools/perf/util/build-id.c | 6 +++--- tools/perf/util/build-id.h | 5 +++++ tools/perf/util/dso.c | 18 +++++++++--------- tools/perf/util/dso.h | 2 +- tools/perf/util/dsos.c | 4 ++-- tools/perf/util/header.c | 2 +- tools/perf/util/map.c | 4 ++-- tools/perf/util/probe-event.c | 2 +- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/symbol.c | 2 +- tools/perf/util/synthetic-events.c | 2 +- 15 files changed, 34 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index e9a11f4a1109..3d048a8139a7 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -79,12 +79,12 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, int typeflag, struct FTW *ftwbuf __maybe_unused) { struct bench_dso *dso = &dsos[nr_dsos]; - unsigned char build_id[BUILD_ID_SIZE]; + struct build_id bid; if (typeflag == FTW_D || typeflag == FTW_SL) return 0; - if (filename__read_build_id(fpath, build_id, BUILD_ID_SIZE) < 0) + if (filename__read_build_id(fpath, bid.data, sizeof(bid.data)) < 0) return 0; dso->name = realpath(fpath, NULL); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 39efa51d7fb3..a523c629f321 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -284,7 +284,7 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) pr_warning("Problems with %s file, consider removing it from the cache\n", filename); - } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) { + } else if (memcmp(dso->bid.data, build_id, sizeof(dso->bid.data))) { pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f3f965157d69..a35cdabe5bd4 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -522,8 +522,8 @@ static int dso__read_build_id(struct dso *dso) return 0; nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (filename__read_build_id(dso->long_name, dso->build_id, - sizeof(dso->build_id)) > 0) { + if (filename__read_build_id(dso->long_name, dso->bid.data, + sizeof(dso->bid.data)) > 0) { dso->has_build_id = true; } nsinfo__mountns_exit(&nsc); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index fc17af7ba845..a016e1bd7b8d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1578,8 +1578,8 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s char *build_id_msg = NULL; if (dso->has_build_id) { - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), bf + 15); + build_id__sprintf(dso->bid.data, + sizeof(dso->bid.data), bf + 15); build_id_msg = bf; } scnprintf(buf, buflen, diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 31207b6e2066..7da13ddb0d50 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -272,7 +272,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, if (!dso->has_build_id) return NULL; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; @@ -355,7 +355,7 @@ static int machine__write_buildid_table(struct machine *machine, in_kernel = pos->kernel || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); - err = write_buildid(name, name_len, pos->build_id, machine->pid, + err = write_buildid(name, name_len, pos->bid.data, machine->pid, in_kernel ? kmisc : umisc, fd); if (err) break; @@ -841,7 +841,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) is_kallsyms = true; name = machine->mmap_name; } - return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, + return build_id_cache__add_b(dso->bid.data, sizeof(dso->bid.data), name, dso->nsinfo, is_kallsyms, is_vdso); } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 949f7e54c9cb..5033e96d5c9d 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -8,6 +8,11 @@ #include "tool.h" #include +struct build_id { + u8 data[BUILD_ID_SIZE]; + size_t size; +}; + struct nsinfo; extern struct perf_tool build_id__mark_dso_hit_ops; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5a3b4755f0b3..d2c1ed08c879 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -172,8 +172,8 @@ int dso__read_binary_type_filename(const struct dso *dso, break; } - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), + build_id__sprintf(dso->bid.data, + sizeof(dso->bid.data), build_id_hex); len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/"); snprintf(filename + len, size - len, "%.2s/%s.debug", @@ -1330,13 +1330,13 @@ void dso__put(struct dso *dso) void dso__set_build_id(struct dso *dso, void *build_id) { - memcpy(dso->build_id, build_id, sizeof(dso->build_id)); + memcpy(dso->bid.data, build_id, sizeof(dso->bid.data)); dso->has_build_id = 1; } bool dso__build_id_equal(const struct dso *dso, u8 *build_id) { - return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0; + return memcmp(dso->bid.data, build_id, sizeof(dso->bid.data)) == 0; } void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) @@ -1346,8 +1346,8 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) if (machine__is_default_guest(machine)) return; sprintf(path, "%s/sys/kernel/notes", machine->root_dir); - if (sysfs__read_build_id(path, dso->build_id, - sizeof(dso->build_id)) == 0) + if (sysfs__read_build_id(path, dso->bid.data, + sizeof(dso->bid.data)) == 0) dso->has_build_id = true; } @@ -1365,8 +1365,8 @@ int dso__kernel_module_get_build_id(struct dso *dso, "%s/sys/module/%.*s/notes/.note.gnu.build-id", root_dir, (int)strlen(name) - 1, name); - if (sysfs__read_build_id(filename, dso->build_id, - sizeof(dso->build_id)) == 0) + if (sysfs__read_build_id(filename, dso->bid.data, + sizeof(dso->bid.data)) == 0) dso->has_build_id = true; return 0; @@ -1376,7 +1376,7 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); return fprintf(fp, "%s", sbuild_id); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 8ad17f395a19..eac004210b47 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -176,7 +176,7 @@ struct dso { bool sorted_by_name; bool loaded; u8 rel; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; u64 text_offset; const char *short_name; const char *long_name; diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 939471731ea6..e3af46e818f1 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -73,8 +73,8 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) continue; } nsinfo__mountns_enter(pos->nsinfo, &nsc); - if (filename__read_build_id(pos->long_name, pos->build_id, - sizeof(pos->build_id)) > 0) { + if (filename__read_build_id(pos->long_name, pos->bid.data, + sizeof(pos->bid.data)) > 0) { have_build_id = true; pos->has_build_id = true; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9cf4efdcbbbd..cde8f6eba479 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2095,7 +2095,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, free(m.name); } - build_id__sprintf(dso->build_id, sizeof(dso->build_id), + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); pr_debug("build id event received for %s: %s\n", dso->long_name, sbuild_id); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8b305e624124..522df3090b1c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -331,8 +331,8 @@ int map__load(struct map *map) if (map->dso->has_build_id) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(map->dso->build_id, - sizeof(map->dso->build_id), + build_id__sprintf(map->dso->bid.data, + sizeof(map->dso->bid.data), sbuild_id); pr_debug("%s with build id %s not found", name, sbuild_id); } else diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 3a1b58a92673..2041ad849851 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -473,7 +473,7 @@ static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *ns if (!c) return NULL; - build_id__sprintf(dso->build_id, BUILD_ID_SIZE, sbuild_id); + build_id__sprintf(dso->bid.data, BUILD_ID_SIZE, sbuild_id); fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id, 0, &path); if (fd >= 0) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 739516fdf6e3..db3b1c275d8f 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1064,7 +1064,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); t = tuple_new(5); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5ddf76fb691c..c772c8c70db5 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2153,7 +2153,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); /* Find kallsyms in build-id cache with kcore */ scnprintf(path, sizeof(path), "%s/%s/%s", diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3ca5d9399680..8a23391558cf 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1961,7 +1961,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 len = pos->long_name_len + 1; len = PERF_ALIGN(len, NAME_ALIGN); - memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); + memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data)); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc; ev.build_id.pid = machine->pid; -- cgit v1.3.1 From f766819cd5290d42efa55c505b9bed184fec17bf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:34 +0200 Subject: perf tools: Pass build_id object to filename__read_build_id() Pass a build_id object to filename__read_build_id function, so it can populate the size of the build_id object. Changing filename__read_build_id() code for both ELF/non-ELF code. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/inject-buildid.c | 2 +- tools/perf/builtin-buildid-cache.c | 25 ++++++++++++------------- tools/perf/builtin-inject.c | 4 +--- tools/perf/tests/pe-file-parsing.c | 10 +++++----- tools/perf/tests/sdt.c | 6 +++--- tools/perf/util/build-id.c | 8 +++----- tools/perf/util/dsos.c | 3 +-- tools/perf/util/symbol-elf.c | 16 ++++++++++------ tools/perf/util/symbol-minimal.c | 34 +++++++++++++++++++--------------- tools/perf/util/symbol.c | 6 +++--- tools/perf/util/symbol.h | 3 ++- 11 files changed, 60 insertions(+), 57 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 3d048a8139a7..280227e3ffd7 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -84,7 +84,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, if (typeflag == FTW_D || typeflag == FTW_SL) return 0; - if (filename__read_build_id(fpath, bid.data, sizeof(bid.data)) < 0) + if (filename__read_build_id(fpath, &bid) < 0) return 0; dso->name = realpath(fpath, NULL); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index a523c629f321..c3cb168d546d 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -174,19 +174,19 @@ static int build_id_cache__add_kcore(const char *filename, bool force) static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) { char sbuild_id[SBUILD_ID_SIZE]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int err; struct nscookie nsc; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, nsi, false, false); pr_debug("Adding %s %s: %s\n", sbuild_id, filename, @@ -196,21 +196,21 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) { - u8 build_id[BUILD_ID_SIZE]; char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); err = build_id_cache__remove_s(sbuild_id); pr_debug("Removing %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); @@ -274,17 +274,16 @@ static int build_id_cache__purge_all(void) static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) { char filename[PATH_MAX]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; if (dso__build_id_filename(dso, filename, sizeof(filename), false) && - filename__read_build_id(filename, build_id, - sizeof(build_id)) != sizeof(build_id)) { + filename__read_build_id(filename, &bid) == -1) { if (errno == ENOENT) return false; pr_warning("Problems with %s file, consider removing it from the cache\n", filename); - } else if (memcmp(dso->bid.data, build_id, sizeof(dso->bid.data))) { + } else if (memcmp(dso->bid.data, bid.data, bid.size)) { pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } @@ -300,14 +299,14 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) { - u8 build_id[BUILD_ID_SIZE]; char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -315,7 +314,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) } err = 0; - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); if (build_id_cache__cached(sbuild_id)) err = build_id_cache__remove_s(sbuild_id); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a35cdabe5bd4..452a75fe68e5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -522,10 +522,8 @@ static int dso__read_build_id(struct dso *dso) return 0; nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (filename__read_build_id(dso->long_name, dso->bid.data, - sizeof(dso->bid.data)) > 0) { + if (filename__read_build_id(dso->long_name, &dso->bid) > 0) dso->has_build_id = true; - } nsinfo__mountns_exit(&nsc); return dso->has_build_id ? 0 : -1; diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c index 19eae3e8e229..58b90c42eb38 100644 --- a/tools/perf/tests/pe-file-parsing.c +++ b/tools/perf/tests/pe-file-parsing.c @@ -24,7 +24,7 @@ static int run_dir(const char *d) { char filename[PATH_MAX]; char debugfile[PATH_MAX]; - char build_id[BUILD_ID_SIZE]; + struct build_id bid; char debuglink[PATH_MAX]; char expect_build_id[] = { 0x5a, 0x0f, 0xd8, 0x82, 0xb5, 0x30, 0x84, 0x22, @@ -36,10 +36,10 @@ static int run_dir(const char *d) int ret; scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d); - ret = filename__read_build_id(filename, build_id, BUILD_ID_SIZE); + ret = filename__read_build_id(filename, &bid); TEST_ASSERT_VAL("Failed to read build_id", ret == sizeof(expect_build_id)); - TEST_ASSERT_VAL("Wrong build_id", !memcmp(build_id, expect_build_id, + TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, sizeof(expect_build_id))); ret = filename__read_debuglink(filename, debuglink, PATH_MAX); @@ -48,10 +48,10 @@ static int run_dir(const char *d) !strcmp(debuglink, expect_debuglink)); scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink); - ret = filename__read_build_id(debugfile, build_id, BUILD_ID_SIZE); + ret = filename__read_build_id(debugfile, &bid); TEST_ASSERT_VAL("Failed to read debug file build_id", ret == sizeof(expect_build_id)); - TEST_ASSERT_VAL("Wrong build_id", !memcmp(build_id, expect_build_id, + TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, sizeof(expect_build_id))); dso = dso__new(filename); diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 60f0e9ee04fb..3ef37f739203 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -28,16 +28,16 @@ static int target_function(void) static int build_id_cache__add_file(const char *filename) { char sbuild_id[SBUILD_ID_SIZE]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int err; - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); if (err < 0) { pr_debug("Failed to read build id of %s\n", filename); return err; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false); if (err < 0) pr_debug("Failed to add build id cache of %s\n", filename); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 7da13ddb0d50..62b258aaa128 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -130,16 +130,14 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) int filename__sprintf_build_id(const char *pathname, char *sbuild_id) { - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int ret; - ret = filename__read_build_id(pathname, build_id, sizeof(build_id)); + ret = filename__read_build_id(pathname, &bid); if (ret < 0) return ret; - else if (ret != sizeof(build_id)) - return -EINVAL; - return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + return build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); } /* asnprintf consolidates asprintf and snprintf */ diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index e3af46e818f1..87161e431830 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -73,8 +73,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) continue; } nsinfo__mountns_enter(pos->nsinfo, &nsc); - if (filename__read_build_id(pos->long_name, pos->bid.data, - sizeof(pos->bid.data)) > 0) { + if (filename__read_build_id(pos->long_name, &pos->bid) > 0) { have_build_id = true; pos->has_build_id = true; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 94a156df22d5..61d7c444e6f5 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -534,8 +534,9 @@ out: #ifdef HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, void *bf, size_t size) +int filename__read_build_id(const char *filename, struct build_id *bid) { + size_t size = sizeof(bid->data); int err = -1; bfd *abfd; @@ -551,9 +552,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) if (!abfd->build_id || abfd->build_id->size > size) goto out_close; - memcpy(bf, abfd->build_id->data, abfd->build_id->size); - memset(bf + abfd->build_id->size, 0, size - abfd->build_id->size); - err = abfd->build_id->size; + memcpy(bid->data, abfd->build_id->data, abfd->build_id->size); + memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size); + err = bid->size = abfd->build_id->size; out_close: bfd_close(abfd); @@ -562,8 +563,9 @@ out_close: #else // HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, void *bf, size_t size) +int filename__read_build_id(const char *filename, struct build_id *bid) { + size_t size = sizeof(bid->data); int fd, err = -1; Elf *elf; @@ -580,7 +582,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) goto out_close; } - err = elf_read_build_id(elf, bf, size); + err = elf_read_build_id(elf, bid->data, size); + if (err > 0) + bid->size = err; elf_end(elf); out_close: diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index d6e99af263ec..5173331ee6e4 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -31,9 +31,10 @@ static bool check_need_swap(int file_endian) #define NT_GNU_BUILD_ID 3 -static int read_build_id(void *note_data, size_t note_len, void *bf, - size_t size, bool need_swap) +static int read_build_id(void *note_data, size_t note_len, struct build_id *bid, + bool need_swap) { + size_t size = sizeof(bid->data); struct { u32 n_namesz; u32 n_descsz; @@ -63,8 +64,9 @@ static int read_build_id(void *note_data, size_t note_len, void *bf, nhdr->n_namesz == sizeof("GNU")) { if (memcmp(name, "GNU", sizeof("GNU")) == 0) { size_t sz = min(size, descsz); - memcpy(bf, ptr, sz); - memset(bf + sz, 0, size - sz); + memcpy(bid->data, ptr, sz); + memset(bid->data + sz, 0, size - sz); + bid->size = sz; return 0; } } @@ -84,7 +86,7 @@ int filename__read_debuglink(const char *filename __maybe_unused, /* * Just try PT_NOTE header otherwise fails */ -int filename__read_build_id(const char *filename, void *bf, size_t size) +int filename__read_build_id(const char *filename, struct build_id *bid) { FILE *fp; int ret = -1; @@ -156,9 +158,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) if (fread(buf, buf_size, 1, fp) != 1) goto out_free; - ret = read_build_id(buf, buf_size, bf, size, need_swap); + ret = read_build_id(buf, buf_size, bid, need_swap); if (ret == 0) - ret = size; + ret = bid->size; break; } } else { @@ -207,9 +209,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) if (fread(buf, buf_size, 1, fp) != 1) goto out_free; - ret = read_build_id(buf, buf_size, bf, size, need_swap); + ret = read_build_id(buf, buf_size, bid, need_swap); if (ret == 0) - ret = size; + ret = bid->size; break; } } @@ -220,8 +222,9 @@ out: return ret; } -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +int sysfs__read_build_id(const char *filename, void *build_id, size_t size __maybe_unused) { + struct build_id bid; int fd; int ret = -1; struct stat stbuf; @@ -243,7 +246,9 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) if (read(fd, buf, buf_size) != (ssize_t) buf_size) goto out_free; - ret = read_build_id(buf, buf_size, build_id, size, false); + ret = read_build_id(buf, buf_size, &bid, false); + if (ret > 0) + memcpy(build_id, bid.data, bid.size); out_free: free(buf); out: @@ -339,16 +344,15 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, struct symsrc *runtime_ss __maybe_unused, int kmodule __maybe_unused) { - unsigned char build_id[BUILD_ID_SIZE]; + struct build_id bid; int ret; ret = fd__is_64_bit(ss->fd); if (ret >= 0) dso->is_64_bit = ret; - if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) { - dso__set_build_id(dso, build_id); - } + if (filename__read_build_id(ss->name, &bid) > 0) + dso__set_build_id(dso, bid.data); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c772c8c70db5..4c43bb959fee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1755,7 +1755,7 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; bool perfmap; - unsigned char build_id[BUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; char newmapname[PATH_MAX]; const char *map_path = dso->long_name; @@ -1817,8 +1817,8 @@ int dso__load(struct dso *dso, struct map *map) if (!dso->has_build_id && is_regular_file(dso->long_name)) { __symbol__join_symfs(name, PATH_MAX, dso->long_name); - if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0) - dso__set_build_id(dso, build_id); + if (filename__read_build_id(name, &bid) > 0) + dso__set_build_id(dso, bid.data); } /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 11fe71f46d14..98908fa3f796 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -23,6 +23,7 @@ struct dso; struct map; struct maps; struct option; +struct build_id; /* * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; @@ -142,7 +143,7 @@ struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); -int filename__read_build_id(const char *filename, void *bf, size_t size); +int filename__read_build_id(const char *filename, struct build_id *id); int sysfs__read_build_id(const char *filename, void *bf, size_t size); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, -- cgit v1.3.1 From 3ff1b8c8cc123f21828ff9538f13f1770a9a4099 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:35 +0200 Subject: perf tools: Pass build id object to sysfs__read_build_id() Passing build id object to sysfs__read_build_id function, so it can populate the size of the build_id object. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 6 +++--- tools/perf/util/dso.c | 6 ++---- tools/perf/util/symbol-elf.c | 11 +++++------ tools/perf/util/symbol-minimal.c | 7 ++----- tools/perf/util/symbol.c | 7 +++---- tools/perf/util/symbol.h | 2 +- 6 files changed, 16 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 62b258aaa128..1c332e78bbc3 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -113,7 +113,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) { char notes[PATH_MAX]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int ret; if (!root_dir) @@ -121,11 +121,11 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); - ret = sysfs__read_build_id(notes, build_id, sizeof(build_id)); + ret = sysfs__read_build_id(notes, &bid); if (ret < 0) return ret; - return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + return build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); } int filename__sprintf_build_id(const char *pathname, char *sbuild_id) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d2c1ed08c879..e87fa9a71d9f 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1346,8 +1346,7 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) if (machine__is_default_guest(machine)) return; sprintf(path, "%s/sys/kernel/notes", machine->root_dir); - if (sysfs__read_build_id(path, dso->bid.data, - sizeof(dso->bid.data)) == 0) + if (sysfs__read_build_id(path, &dso->bid) == 0) dso->has_build_id = true; } @@ -1365,8 +1364,7 @@ int dso__kernel_module_get_build_id(struct dso *dso, "%s/sys/module/%.*s/notes/.note.gnu.build-id", root_dir, (int)strlen(name) - 1, name); - if (sysfs__read_build_id(filename, dso->bid.data, - sizeof(dso->bid.data)) == 0) + if (sysfs__read_build_id(filename, &dso->bid) == 0) dso->has_build_id = true; return 0; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 61d7c444e6f5..97a55f162ea5 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -595,13 +595,11 @@ out: #endif // HAVE_LIBBFD_BUILDID_SUPPORT -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +int sysfs__read_build_id(const char *filename, struct build_id *bid) { + size_t size = sizeof(bid->data); int fd, err = -1; - if (size < BUILD_ID_SIZE) - goto out; - fd = open(filename, O_RDONLY); if (fd < 0) goto out; @@ -622,8 +620,9 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) break; if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { size_t sz = min(descsz, size); - if (read(fd, build_id, sz) == (ssize_t)sz) { - memset(build_id + sz, 0, size - sz); + if (read(fd, bid->data, sz) == (ssize_t)sz) { + memset(bid->data + sz, 0, size - sz); + bid->size = sz; err = 0; break; } diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 5173331ee6e4..dba6b9e5d64e 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -222,9 +222,8 @@ out: return ret; } -int sysfs__read_build_id(const char *filename, void *build_id, size_t size __maybe_unused) +int sysfs__read_build_id(const char *filename, struct build_id *bid) { - struct build_id bid; int fd; int ret = -1; struct stat stbuf; @@ -246,9 +245,7 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size __may if (read(fd, buf, buf_size) != (ssize_t) buf_size) goto out_free; - ret = read_build_id(buf, buf_size, &bid, false); - if (ret > 0) - memcpy(build_id, bid.data, bid.size); + ret = read_build_id(buf, buf_size, bid, false); out_free: free(buf); out: diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 4c43bb959fee..dd1cf91c62fb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2122,7 +2122,7 @@ static bool filename__readable(const char *file) static char *dso__find_kallsyms(struct dso *dso, struct map *map) { - u8 host_build_id[BUILD_ID_SIZE]; + struct build_id bid; char sbuild_id[SBUILD_ID_SIZE]; bool is_host = false; char path[PATH_MAX]; @@ -2135,9 +2135,8 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - if (sysfs__read_build_id("/sys/kernel/notes", host_build_id, - sizeof(host_build_id)) == 0) - is_host = dso__build_id_equal(dso, host_build_id); + if (sysfs__read_build_id("/sys/kernel/notes", &bid) == 0) + is_host = dso__build_id_equal(dso, bid.data); /* Try a fast path for /proc/kallsyms if possible */ if (is_host) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 98908fa3f796..70b3874e7dd8 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -144,7 +144,7 @@ struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); int filename__read_build_id(const char *filename, struct build_id *id); -int sysfs__read_build_id(const char *filename, void *bf, size_t size); +int sysfs__read_build_id(const char *filename, struct build_id *bid); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, u64 start, u64 size)); -- cgit v1.3.1 From bf5411695a31f7bf07c3e9bad27b442c9224f2a9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:36 +0200 Subject: perf tools: Pass build_id object to build_id__sprintf() Passing build_id object to build_id__sprintf function, so it can operate with the proper size of build id. This will create proper md5 build id readable names, like following: a50e350e97c43b4708d09bcd85ebfff7 instead of: a50e350e97c43b4708d09bcd85ebfff700000000 Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 6 ++--- tools/perf/tests/sdt.c | 2 +- tools/perf/util/annotate.c | 3 +-- tools/perf/util/build-id.c | 30 ++++++++++++++-------- tools/perf/util/build-id.h | 3 ++- tools/perf/util/dso.c | 6 ++--- tools/perf/util/header.c | 3 +-- tools/perf/util/map.c | 4 +-- tools/perf/util/probe-event.c | 9 ++++--- tools/perf/util/probe-finder.c | 8 +++--- .../util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/symbol.c | 2 +- 12 files changed, 43 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index c3cb168d546d..a25411926e48 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -186,7 +186,7 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) return -1; } - build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); + build_id__sprintf(&bid, sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, nsi, false, false); pr_debug("Adding %s %s: %s\n", sbuild_id, filename, @@ -210,7 +210,7 @@ static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) return -1; } - build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); + build_id__sprintf(&bid, sbuild_id); err = build_id_cache__remove_s(sbuild_id); pr_debug("Removing %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); @@ -314,7 +314,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) } err = 0; - build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); + build_id__sprintf(&bid, sbuild_id); if (build_id_cache__cached(sbuild_id)) err = build_id_cache__remove_s(sbuild_id); diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 3ef37f739203..ed76c693f65e 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -37,7 +37,7 @@ static int build_id_cache__add_file(const char *filename) return err; } - build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); + build_id__sprintf(&bid, sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false); if (err < 0) pr_debug("Failed to add build id cache of %s\n", filename); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index a016e1bd7b8d..6c8575e182ed 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1578,8 +1578,7 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s char *build_id_msg = NULL; if (dso->has_build_id) { - build_id__sprintf(dso->bid.data, - sizeof(dso->bid.data), bf + 15); + build_id__sprintf(&dso->bid, bf + 15); build_id_msg = bf; } scnprintf(buf, buflen, diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 1c332e78bbc3..b5648735f01f 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -37,6 +37,7 @@ #include #include +#include static bool no_buildid_cache; @@ -95,13 +96,13 @@ struct perf_tool build_id__mark_dso_hit_ops = { .ordered_events = true, }; -int build_id__sprintf(const u8 *build_id, int len, char *bf) +int build_id__sprintf(const struct build_id *build_id, char *bf) { char *bid = bf; - const u8 *raw = build_id; - int i; + const u8 *raw = build_id->data; + size_t i; - for (i = 0; i < len; ++i) { + for (i = 0; i < build_id->size; ++i) { sprintf(bid, "%02x", *raw); ++raw; bid += 2; @@ -125,7 +126,7 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) if (ret < 0) return ret; - return build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); + return build_id__sprintf(&bid, sbuild_id); } int filename__sprintf_build_id(const char *pathname, char *sbuild_id) @@ -137,7 +138,7 @@ int filename__sprintf_build_id(const char *pathname, char *sbuild_id) if (ret < 0) return ret; - return build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); + return build_id__sprintf(&bid, sbuild_id); } /* asnprintf consolidates asprintf and snprintf */ @@ -270,7 +271,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, if (!dso->has_build_id) return NULL; - build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; @@ -767,13 +768,13 @@ out_free: return err; } -static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, +static int build_id_cache__add_b(const struct build_id *bid, const char *name, struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(build_id, build_id_size, sbuild_id); + build_id__sprintf(bid, sbuild_id); return build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms, is_vdso); @@ -839,8 +840,8 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) is_kallsyms = true; name = machine->mmap_name; } - return build_id_cache__add_b(dso->bid.data, sizeof(dso->bid.data), name, - dso->nsinfo, is_kallsyms, is_vdso); + return build_id_cache__add_b(&dso->bid, name, dso->nsinfo, + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, @@ -900,3 +901,10 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) return ret; } + +void build_id__init(struct build_id *bid, const u8 *data, size_t size) +{ + WARN_ON(size > BUILD_ID_SIZE); + memcpy(bid->data, data, size); + bid->size = size; +} diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 5033e96d5c9d..f293f99d5dba 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -19,7 +19,8 @@ extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; struct feat_fd; -int build_id__sprintf(const u8 *build_id, int len, char *bf); +void build_id__init(struct build_id *bid, const u8 *data, size_t size); +int build_id__sprintf(const struct build_id *build_id, char *bf); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index e87fa9a71d9f..2f7f01ead9a1 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -172,9 +172,7 @@ int dso__read_binary_type_filename(const struct dso *dso, break; } - build_id__sprintf(dso->bid.data, - sizeof(dso->bid.data), - build_id_hex); + build_id__sprintf(&dso->bid, build_id_hex); len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/"); snprintf(filename + len, size - len, "%.2s/%s.debug", build_id_hex, build_id_hex + 2); @@ -1374,7 +1372,7 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); return fprintf(fp, "%s", sbuild_id); } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index cde8f6eba479..fe220f01fc94 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2095,8 +2095,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, free(m.name); } - build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), - sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); pr_debug("build id event received for %s: %s\n", dso->long_name, sbuild_id); dso__put(dso); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 522df3090b1c..f44ede437dc7 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -331,9 +331,7 @@ int map__load(struct map *map) if (map->dso->has_build_id) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(map->dso->bid.data, - sizeof(map->dso->bid.data), - sbuild_id); + build_id__sprintf(&map->dso->bid, sbuild_id); pr_debug("%s with build id %s not found", name, sbuild_id); } else pr_debug("Failed to open %s", name); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 2041ad849851..8eae2afff71a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -473,7 +473,7 @@ static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *ns if (!c) return NULL; - build_id__sprintf(dso->bid.data, BUILD_ID_SIZE, sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id, 0, &path); if (fd >= 0) @@ -1005,6 +1005,7 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) static int __show_line_range(struct line_range *lr, const char *module, bool user) { + struct build_id bid; int l = 1; struct int_node *ln; struct debuginfo *dinfo; @@ -1025,8 +1026,10 @@ static int __show_line_range(struct line_range *lr, const char *module, if (!ret) ret = debuginfo__find_line_range(dinfo, lr); } - if (dinfo->build_id) - build_id__sprintf(dinfo->build_id, BUILD_ID_SIZE, sbuild_id); + if (dinfo->build_id) { + build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); + build_id__sprintf(&bid, sbuild_id); + } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 6eddf7be8293..2c4061035f77 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -949,6 +949,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno, /* Find probe points from lazy pattern */ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { + struct build_id bid; char sbuild_id[SBUILD_ID_SIZE] = ""; int ret = 0; char *fpath; @@ -957,9 +958,10 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) const char *comp_dir; comp_dir = cu_get_comp_dir(&pf->cu_die); - if (pf->dbg->build_id) - build_id__sprintf(pf->dbg->build_id, - BUILD_ID_SIZE, sbuild_id); + if (pf->dbg->build_id) { + build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE); + build_id__sprintf(&bid, sbuild_id); + } ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath); if (ret < 0) { pr_warning("Failed to find source file path.\n"); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index db3b1c275d8f..7cbd024e3e63 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1064,7 +1064,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; - build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); t = tuple_new(5); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index dd1cf91c62fb..369cbad09f0d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2152,7 +2152,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); /* Find kallsyms in build-id cache with kcore */ scnprintf(path, sizeof(path), "%s/%s/%s", -- cgit v1.3.1 From 8dfdf440d398f0bf63f1d4de881cbe9c69df53bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:37 +0200 Subject: perf tools: Pass build_id object to dso__set_build_id() Passing build_id object to dso__set_build_id(), so it's easier to initialize dos's build id object. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 4 ++-- tools/perf/util/dso.h | 2 +- tools/perf/util/header.c | 4 +++- tools/perf/util/symbol-minimal.c | 2 +- tools/perf/util/symbol.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 2f7f01ead9a1..4415ce83150b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1326,9 +1326,9 @@ void dso__put(struct dso *dso) dso__delete(dso); } -void dso__set_build_id(struct dso *dso, void *build_id) +void dso__set_build_id(struct dso *dso, struct build_id *bid) { - memcpy(dso->bid.data, build_id, sizeof(dso->bid.data)); + dso->bid = *bid; dso->has_build_id = 1; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index eac004210b47..5a5678dbdaa5 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -260,7 +260,7 @@ bool dso__sorted_by_name(const struct dso *dso); void dso__set_sorted_by_name(struct dso *dso); void dso__sort_by_name(struct dso *dso); -void dso__set_build_id(struct dso *dso, void *build_id); +void dso__set_build_id(struct dso *dso, struct build_id *bid); bool dso__build_id_equal(const struct dso *dso, u8 *build_id); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fe220f01fc94..21243adbb9fd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2082,8 +2082,10 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, dso = machine__findnew_dso(machine, filename); if (dso != NULL) { char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; - dso__set_build_id(dso, &bev->build_id); + build_id__init(&bid, bev->build_id, BUILD_ID_SIZE); + dso__set_build_id(dso, &bid); if (dso_space != DSO_SPACE__USER) { struct kmod_path m = { .name = NULL, }; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index dba6b9e5d64e..f9eb0bee7f15 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -349,7 +349,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, dso->is_64_bit = ret; if (filename__read_build_id(ss->name, &bid) > 0) - dso__set_build_id(dso, bid.data); + dso__set_build_id(dso, &bid); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 369cbad09f0d..976632d0baa0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1818,7 +1818,7 @@ int dso__load(struct dso *dso, struct map *map) is_regular_file(dso->long_name)) { __symbol__join_symfs(name, PATH_MAX, dso->long_name); if (filename__read_build_id(name, &bid) > 0) - dso__set_build_id(dso, bid.data); + dso__set_build_id(dso, &bid); } /* -- cgit v1.3.1 From 39be8d0115b321ed2b28e43dca12c13ae7595f73 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:38 +0200 Subject: perf tools: Pass build_id object to dso__build_id_equal() Passing build_id object to dso__build_id_equal(), so we can properly check build id with different size than sha1. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 5 +++-- tools/perf/util/dso.h | 2 +- tools/perf/util/symbol-elf.c | 8 ++++++-- tools/perf/util/symbol.c | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 4415ce83150b..ca965845b35e 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1332,9 +1332,10 @@ void dso__set_build_id(struct dso *dso, struct build_id *bid) dso->has_build_id = 1; } -bool dso__build_id_equal(const struct dso *dso, u8 *build_id) +bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) { - return memcmp(dso->bid.data, build_id, sizeof(dso->bid.data)) == 0; + return dso->bid.size == bid->size && + memcmp(dso->bid.data, bid->data, dso->bid.size) == 0; } void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 5a5678dbdaa5..f926c96bf230 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -261,7 +261,7 @@ void dso__set_sorted_by_name(struct dso *dso); void dso__sort_by_name(struct dso *dso); void dso__set_build_id(struct dso *dso, struct build_id *bid); -bool dso__build_id_equal(const struct dso *dso, u8 *build_id); +bool dso__build_id_equal(const struct dso *dso, struct build_id *bid); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 97a55f162ea5..44dd86a4f25f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -834,13 +834,17 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, /* Always reject images with a mismatched build-id: */ if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) { u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; + int size; - if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) { + size = elf_read_build_id(elf, build_id, BUILD_ID_SIZE); + if (size <= 0) { dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID; goto out_elf_end; } - if (!dso__build_id_equal(dso, build_id)) { + build_id__init(&bid, build_id, size); + if (!dso__build_id_equal(dso, &bid)) { pr_debug("%s: build id mismatch for %s.\n", __func__, name); dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID; goto out_elf_end; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 976632d0baa0..6138866665df 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2136,7 +2136,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) } if (sysfs__read_build_id("/sys/kernel/notes", &bid) == 0) - is_host = dso__build_id_equal(dso, bid.data); + is_host = dso__build_id_equal(dso, &bid); /* Try a fast path for /proc/kallsyms if possible */ if (is_host) { -- cgit v1.3.1 From b0a323c7f0ece5dd76a5dc296ecd9175851a4283 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:39 +0200 Subject: perf tools: Add size to 'struct perf_record_header_build_id' We do not store size with build ids in perf data, but there's enough space to do it. Adding misc bit PERF_RECORD_MISC_BUILD_ID_SIZE to mark build id event with size. With this fix the dso with md5 build id will have correct build id data and will be usable for debuginfod processing if needed (coming in following patches). Committer notes: Use %zu with size_t to fix this error on 32-bit arches: util/header.c: In function '__event_process_build_id': util/header.c:2105:3: error: format '%lu' expects argument of type 'long unsigned int', but argument 6 has type 'size_t' [-Werror=format=] pr_debug("build id event received for %s: %s [%lu]\n", ^ Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/event.h | 12 +++++++++++- tools/perf/util/build-id.c | 8 +++++--- tools/perf/util/header.c | 10 +++++++--- 3 files changed, 23 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index a6dbba6b9073..988c539bedb6 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -201,10 +201,20 @@ struct perf_record_header_tracing_data { __u32 size; }; +#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15) + struct perf_record_header_build_id { struct perf_event_header header; pid_t pid; - __u8 build_id[24]; + union { + __u8 build_id[24]; + struct { + __u8 data[20]; + __u8 size; + __u8 reserved1__; + __u16 reserved2__; + }; + }; char filename[]; }; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index b5648735f01f..8763772f1095 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -296,7 +296,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, continue; \ else -static int write_buildid(const char *name, size_t name_len, u8 *build_id, +static int write_buildid(const char *name, size_t name_len, struct build_id *bid, pid_t pid, u16 misc, struct feat_fd *fd) { int err; @@ -307,7 +307,9 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id, len = PERF_ALIGN(len, NAME_ALIGN); memset(&b, 0, sizeof(b)); - memcpy(&b.build_id, build_id, BUILD_ID_SIZE); + memcpy(&b.data, bid->data, bid->size); + b.size = (u8) bid->size; + misc |= PERF_RECORD_MISC_BUILD_ID_SIZE; b.pid = pid; b.header.misc = misc; b.header.size = sizeof(b) + len; @@ -354,7 +356,7 @@ static int machine__write_buildid_table(struct machine *machine, in_kernel = pos->kernel || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); - err = write_buildid(name, name_len, pos->bid.data, machine->pid, + err = write_buildid(name, name_len, &pos->bid, machine->pid, in_kernel ? kmisc : umisc, fd); if (err) break; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 21243adbb9fd..be850e9f8852 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2083,8 +2083,12 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, if (dso != NULL) { char sbuild_id[SBUILD_ID_SIZE]; struct build_id bid; + size_t size = BUILD_ID_SIZE; - build_id__init(&bid, bev->build_id, BUILD_ID_SIZE); + if (bev->header.misc & PERF_RECORD_MISC_BUILD_ID_SIZE) + size = bev->size; + + build_id__init(&bid, bev->data, size); dso__set_build_id(dso, &bid); if (dso_space != DSO_SPACE__USER) { @@ -2098,8 +2102,8 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, } build_id__sprintf(&dso->bid, sbuild_id); - pr_debug("build id event received for %s: %s\n", - dso->long_name, sbuild_id); + pr_debug("build id event received for %s: %s [%zu]\n", + dso->long_name, sbuild_id, size); dso__put(dso); } -- cgit v1.3.1 From e9ad94381cd0c694080810aea430130bb0b0e404 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:40 +0200 Subject: perf tools: Align buildid list output for short build ids With shorter md5 build ids we need to align their paths properly with other build ids: $ perf buildid-list 17f4e448cc746582ea1881528deb549f7fdb3fd5 [kernel.kallsyms] a50e350e97c43b4708d09bcd85ebfff7 .../tools/perf/buildid-ex-md5 1805c738c8f3ec0f47b7ea09080c28f34d18a82b /usr/lib64/ld-2.31.so $ Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 2 +- tools/perf/util/dso.h | 1 - tools/perf/util/dsos.c | 6 ++++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ca965845b35e..55c11e854fe4 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1369,7 +1369,7 @@ int dso__kernel_module_get_build_id(struct dso *dso, return 0; } -size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) +static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { char sbuild_id[SBUILD_ID_SIZE]; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index f926c96bf230..d8cb4f5680a4 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -362,7 +362,6 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, void dso__reset_find_symbol_cache(struct dso *dso); -size_t dso__fprintf_buildid(struct dso *dso, FILE *fp); size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp); size_t dso__fprintf(struct dso *dso, FILE *fp); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 87161e431830..183a81d5b2f9 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -287,10 +287,12 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, size_t ret = 0; list_for_each_entry(pos, head, node) { + char sbuild_id[SBUILD_ID_SIZE]; + if (skip && skip(pos, parm)) continue; - ret += dso__fprintf_buildid(pos, fp); - ret += fprintf(fp, " %s\n", pos->long_name); + build_id__sprintf(&pos->bid, sbuild_id); + ret += fprintf(fp, "%-40s %s\n", sbuild_id, pos->long_name); } return ret; } -- cgit v1.3.1 From 78b2c50c5de990864f6180acea7a7037685b1b1b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:41 +0200 Subject: perf test: Add build id shell test Add a test for the build id cache that adds a binary with sha1 and md5 build ids and verifies it's added properly. The test updates build id cache with 'perf record' and 'perf buildid-cache -a'. Committer testing: # perf test "build id" 82: build id cache operations : Ok # # perf test -v "build id" 82: build id cache operations : --- start --- test child forked, pid 447218 test binaries: /tmp/perf.ex.SHA1.B8I /tmp/perf.ex.MD5.7Nv Adding d1abc1eb7568358cf23c959566f23462461834d1 /tmp/perf.ex.SHA1.B8I: Ok build id: d1abc1eb7568358cf23c959566f23462461834d1 link: /tmp/perf.debug.sS2/.build-id/d1/abc1eb7568358cf23c959566f23462461834d1 file: /tmp/perf.debug.sS2/.build-id/d1/../../tmp/perf.ex.SHA1.B8I/d1abc1eb7568358cf23c959566f23462461834d1/elf OK for /tmp/perf.ex.SHA1.B8I Adding a50e350e97c43b4708d09bcd85ebfff7 /tmp/perf.ex.MD5.7Nv: Ok build id: a50e350e97c43b4708d09bcd85ebfff7 link: /tmp/perf.debug.IuW/.build-id/a5/0e350e97c43b4708d09bcd85ebfff7 file: /tmp/perf.debug.IuW/.build-id/a5/../../tmp/perf.ex.MD5.7Nv/a50e350e97c43b4708d09bcd85ebfff7/elf OK for /tmp/perf.ex.MD5.7Nv [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.034 MB /tmp/perf.data.xrH ] build id: d1abc1eb7568358cf23c959566f23462461834d1 link: /tmp/perf.debug.eGR/.build-id/d1/abc1eb7568358cf23c959566f23462461834d1 file: /tmp/perf.debug.eGR/.build-id/d1/../../tmp/perf.ex.SHA1.B8I/d1abc1eb7568358cf23c959566f23462461834d1/elf OK for /tmp/perf.ex.SHA1.B8I [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.034 MB /tmp/perf.data.cbE ] build id: a50e350e97c43b4708d09bcd85ebfff7 link: /tmp/perf.debug.82t/.build-id/a5/0e350e97c43b4708d09bcd85ebfff7 file: /tmp/perf.debug.82t/.build-id/a5/../../tmp/perf.ex.MD5.7Nv/a50e350e97c43b4708d09bcd85ebfff7/elf OK for /tmp/perf.ex.MD5.7Nv test child finished with 0 ---- end ---- build id cache operations: Ok # Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201013192441.1299447-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/buildid.sh | 101 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 tools/perf/tests/shell/buildid.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh new file mode 100755 index 000000000000..4861a20edee2 --- /dev/null +++ b/tools/perf/tests/shell/buildid.sh @@ -0,0 +1,101 @@ +#!/bin/sh +# build id cache operations +# SPDX-License-Identifier: GPL-2.0 + +# skip if there's no readelf +if ! [ -x "$(command -v readelf)" ]; then + echo "failed: no readelf, install binutils" + exit 2 +fi + +# skip if there's no compiler +if ! [ -x "$(command -v cc)" ]; then + echo "failed: no compiler, install gcc" + exit 2 +fi + +ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX) +ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX) + +echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c - +echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c - + +echo "test binaries: ${ex_sha1} ${ex_md5}" + +check() +{ + id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` + + echo "build id: ${id}" + + link=${build_id_dir}/.build-id/${id:0:2}/${id:2} + echo "link: ${link}" + + if [ ! -h $link ]; then + echo "failed: link ${link} does not exist" + exit 1 + fi + + file=${build_id_dir}/.build-id/${id:0:2}/`readlink ${link}`/elf + echo "file: ${file}" + + if [ ! -x $file ]; then + echo "failed: file ${file} does not exist" + exit 1 + fi + + diff ${file} ${1} + if [ $? -ne 0 ]; then + echo "failed: ${file} do not match" + exit 1 + fi + + echo "OK for ${1}" +} + +test_add() +{ + build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) + perf="perf --buildid-dir ${build_id_dir}" + + ${perf} buildid-cache -v -a ${1} + if [ $? -ne 0 ]; then + echo "failed: add ${1} to build id cache" + exit 1 + fi + + check ${1} + + rm -rf ${build_id_dir} +} + +test_record() +{ + data=$(mktemp /tmp/perf.data.XXX) + build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) + perf="perf --buildid-dir ${build_id_dir}" + + ${perf} record --buildid-all -o ${data} ${1} + if [ $? -ne 0 ]; then + echo "failed: record ${1}" + exit 1 + fi + + check ${1} + + rm -rf ${build_id_dir} + rm -rf ${data} +} + +# add binaries manual via perf buildid-cache -a +test_add ${ex_sha1} +test_add ${ex_md5} + +# add binaries via perf record post processing +test_record ${ex_sha1} +test_record ${ex_md5} + +# cleanup +rm ${ex_sha1} ${ex_md5} + +exit ${err} -- cgit v1.3.1 From 0997a2662fa34551e37bb6f1246a179678b2f984 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 14 Oct 2020 07:42:55 -0700 Subject: perf tools: Add support for exclusive groups/events Peter suggested that using the exclusive mode in perf could avoid some problems with bad scheduling of groups. Exclusive is implemented in the kernel, but wasn't exposed by the perf tool, so hard to use without custom low level API users. Add support for marking groups or events with :e for exclusive in the perf tool. The implementation is basically the same as the existing pinned attribute. Committer testing: # perf test "parse event" 6: Parse event definition strings : Ok # perf test -v "parse event" |& grep :u*e running test 56 'instructions:uep' running test 57 '{cycles,cache-misses,branch-misses}:e' # # # grep "model name" -m1 /proc/cpuinfo model name : AMD Ryzen 9 3900X 12-Core Processor # # perf stat -a -e '{cycles,cache-misses,branch-misses}:e' sleep 1 Performance counter stats for 'system wide': cycles (0.00%) cache-misses (0.00%) branch-misses (0.00%) 1.001269893 seconds time elapsed Some events weren't counted. Try disabling the NMI watchdog: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog # echo 0 > /proc/sys/kernel/nmi_watchdog # perf stat -a -e '{cycles,cache-misses,branch-misses}:e' sleep 1 Performance counter stats for 'system wide': 1,298,663,141 cycles 30,962,215 cache-misses 5,325,150 branch-misses 1.001474934 seconds time elapsed # # The output for asking for precise events on AMD needs to improve, it # supposedly works only for system wide or per CPU # # perf stat -a -e '{cycles,cache-misses,branch-misses}:uep' sleep 1 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles). /bin/dmesg | grep -i perf may provide additional information. # perf stat -a -e '{cycles,cache-misses,branch-misses}:ue' sleep 1 Performance counter stats for 'system wide': 746,363,126 cycles 16,881,611 cache-misses 2,871,259 branch-misses 1.001636066 seconds time elapsed # Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201014144255.22699-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 1 + tools/perf/tests/parse-events.c | 58 +++++++++++++++++++++++++++++++++- tools/perf/util/parse-events.c | 11 +++++-- tools/perf/util/parse-events.l | 2 +- 4 files changed, 68 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 10ed539a8859..4c7db1da8fcc 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -58,6 +58,7 @@ counted. The following modifiers exist: S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU W - group is weak and will fallback to non-group if not schedulable, + e - group or event are exclusive and do not share the PMU The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index aae0fd9045c1..611512f22b34 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -557,6 +557,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); return 0; } @@ -575,6 +576,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); /* cpu/pmu-event/u*/ evsel = evsel__next(evsel); @@ -587,6 +589,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.pinned); return 0; } @@ -1277,6 +1280,49 @@ static int test__pinned_group(struct evlist *evlist) return 0; } +static int test__checkevent_exclusive_modifier(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); + TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__exclusive_group(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); + + /* cycles - group leader */ + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive); + + /* cache-misses - can not be pinned, but will go on with the leader */ + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); + + /* branch-misses - ditto */ + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); + + return 0; +} static int test__checkevent_breakpoint_len(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); @@ -1765,7 +1811,17 @@ static struct evlist_test test__events[] = { .name = "cycles:k", .check = test__sym_event_dc, .id = 55, - } + }, + { + .name = "instructions:uep", + .check = test__checkevent_exclusive_modifier, + .id = 56, + }, + { + .name = "{cycles,cache-misses,branch-misses}:e", + .check = test__exclusive_group, + .id = 57, + }, }; static struct evlist_test test__events_pmu[] = { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3bcdf084df91..3b273580fb84 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1775,6 +1775,7 @@ struct event_modifier { int sample_read; int pinned; int weak; + int exclusive; }; static int get_event_modifier(struct event_modifier *mod, char *str, @@ -1790,6 +1791,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int precise_max = 0; int sample_read = 0; int pinned = evsel ? evsel->core.attr.pinned : 0; + int exclusive = evsel ? evsel->core.attr.exclusive : 0; int exclude = eu | ek | eh; int exclude_GH = evsel ? evsel->exclude_GH : 0; @@ -1833,6 +1835,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, sample_read = 1; } else if (*str == 'D') { pinned = 1; + } else if (*str == 'e') { + exclusive = 1; } else if (*str == 'W') { weak = 1; } else @@ -1866,6 +1870,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->sample_read = sample_read; mod->pinned = pinned; mod->weak = weak; + mod->exclusive = exclusive; return 0; } @@ -1879,7 +1884,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDIWe") - 1)) return -1; while (*p) { @@ -1921,8 +1926,10 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->precise_max = mod.precise_max; evsel->weak_group = mod.weak; - if (evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) { evsel->core.attr.pinned = mod.pinned; + evsel->core.attr.exclusive = mod.exclusive; + } } return 0; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 3ca5fd2829ca..9db5097317f4 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -210,7 +210,7 @@ name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpPGHSDIW]+ +modifier_event [ukhpPGHSDIWe]+ modifier_bp [rwx]{1,3} %% -- cgit v1.3.1 From 6556a75becb51afa8f9233186d437e9b6eb6f4b5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 13 Oct 2020 20:53:46 -0700 Subject: perf intel-pt: Improve PT documentation slightly Document the higher level --insn-trace etc. perf script options. Include the howto how to build xed into the manpage Signed-off-by: Andi Kleen Cc: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20201014035346.4772-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index cb637e0d0743..cd362dc2af07 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -112,6 +112,32 @@ The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, and in transaction, respectively. +perf script also supports higher level ways to dump instruction traces: + + perf script --insn-trace --xed + +Dump all instructions. This requires installing the xed tool (see XED below) +Dumping all instructions in a long trace can be fairly slow. It is usually better +to start with higher level decoding, like + + perf script --call-trace + +or + + perf script --call-ret-trace + +and then select a time range of interest. The time range can then be examined +in detail with + + perf script --time starttime,stoptime --insn-trace --xed + +While examining the trace it's also useful to filter on specific CPUs using +the -C option + + perf script --time starttime,stoptime --insn-trace --xed -C 1 + +Dump all instructions in time range on CPU 1. + Another interesting field that is not printed by default is 'ipc' which can be displayed as follows: @@ -1110,6 +1136,10 @@ To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. perf script --itrace=oe +XED +--- + +include::build-xed.txt[] SEE ALSO -------- -- cgit v1.3.1 From 480accbb17609be01abcd09517605a930791bfdb Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 9 Oct 2020 10:28:39 +0800 Subject: perf streams: Introduce branch history "streams" We define a stream as the branch history which is aggregated by the branch records from perf samples. For example, the callchains aggregated from the branch records are considered as streams. By browsing the hot stream, we can understand the hot code path. Now we only support the callchain for stream. For measuring the hot level for a stream, we use the callchain_node->hit, higher is hotter. There may be many callchains sampled so we only focus on the top N hottest callchains. N is a user defined parameter or predefined default value (nr_streams_max). This patch creates an evsel_streams array per event, and saves the top N hottest streams in a stream array. So now we can get the per-event top N hottest streams. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201009022845.13141-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/stream.c | 164 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/stream.h | 30 +++++++++ 3 files changed, 195 insertions(+) create mode 100644 tools/perf/util/stream.c create mode 100644 tools/perf/util/stream.h (limited to 'tools') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index eebbd5223cae..e2563d0154eb 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -102,6 +102,7 @@ perf-y += rwsem.o perf-y += thread-stack.o perf-y += spark.o perf-y += topdown.o +perf-y += stream.o perf-$(CONFIG_AUXTRACE) += auxtrace.o perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ perf-$(CONFIG_AUXTRACE) += intel-pt.o diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c new file mode 100644 index 000000000000..31efe4ae0f55 --- /dev/null +++ b/tools/perf/util/stream.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Compare and figure out the top N hottest streams + * Copyright (c) 2020, Intel Corporation. + * Author: Jin Yao + */ + +#include +#include +#include +#include "debug.h" +#include "hist.h" +#include "sort.h" +#include "stream.h" +#include "evlist.h" + +static void evsel_streams__delete(struct evsel_streams *es, int nr_evsel) +{ + for (int i = 0; i < nr_evsel; i++) + zfree(&es[i].streams); + + free(es); +} + +void evlist_streams__delete(struct evlist_streams *els) +{ + evsel_streams__delete(els->ev_streams, els->nr_evsel); + free(els); +} + +static struct evlist_streams *evlist_streams__new(int nr_evsel, + int nr_streams_max) +{ + struct evlist_streams *els; + struct evsel_streams *es; + + els = zalloc(sizeof(*els)); + if (!els) + return NULL; + + es = calloc(nr_evsel, sizeof(struct evsel_streams)); + if (!es) { + free(els); + return NULL; + } + + for (int i = 0; i < nr_evsel; i++) { + struct evsel_streams *s = &es[i]; + + s->streams = calloc(nr_streams_max, sizeof(struct stream)); + if (!s->streams) + goto err; + + s->nr_streams_max = nr_streams_max; + s->evsel_idx = -1; + } + + els->ev_streams = es; + els->nr_evsel = nr_evsel; + return els; + +err: + evsel_streams__delete(es, nr_evsel); + return NULL; +} + +/* + * The cnodes with high hit number are hot callchains. + */ +static void evsel_streams__set_hot_cnode(struct evsel_streams *es, + struct callchain_node *cnode) +{ + int i, idx = 0; + u64 hit; + + if (es->nr_streams < es->nr_streams_max) { + i = es->nr_streams; + es->streams[i].cnode = cnode; + es->nr_streams++; + return; + } + + /* + * Considering a few number of hot streams, only use simple + * way to find the cnode with smallest hit number and replace. + */ + hit = (es->streams[0].cnode)->hit; + for (i = 1; i < es->nr_streams; i++) { + if ((es->streams[i].cnode)->hit < hit) { + hit = (es->streams[i].cnode)->hit; + idx = i; + } + } + + if (cnode->hit > hit) + es->streams[idx].cnode = cnode; +} + +static void update_hot_callchain(struct hist_entry *he, + struct evsel_streams *es) +{ + struct rb_root *root = &he->sorted_chain; + struct rb_node *rb_node = rb_first(root); + struct callchain_node *cnode; + + while (rb_node) { + cnode = rb_entry(rb_node, struct callchain_node, rb_node); + evsel_streams__set_hot_cnode(es, cnode); + rb_node = rb_next(rb_node); + } +} + +static void init_hot_callchain(struct hists *hists, struct evsel_streams *es) +{ + struct rb_node *next = rb_first_cached(&hists->entries); + + while (next) { + struct hist_entry *he; + + he = rb_entry(next, struct hist_entry, rb_node); + update_hot_callchain(he, es); + next = rb_next(&he->rb_node); + } +} + +static int evlist__init_callchain_streams(struct evlist *evlist, + struct evlist_streams *els) +{ + struct evsel_streams *es = els->ev_streams; + struct evsel *pos; + int i = 0; + + BUG_ON(els->nr_evsel < evlist->core.nr_entries); + + evlist__for_each_entry(evlist, pos) { + struct hists *hists = evsel__hists(pos); + + hists__output_resort(hists, NULL); + init_hot_callchain(hists, &es[i]); + es[i].evsel_idx = pos->idx; + i++; + } + + return 0; +} + +struct evlist_streams *evlist__create_streams(struct evlist *evlist, + int nr_streams_max) +{ + int nr_evsel = evlist->core.nr_entries, ret = -1; + struct evlist_streams *els = evlist_streams__new(nr_evsel, + nr_streams_max); + + if (!els) + return NULL; + + ret = evlist__init_callchain_streams(evlist, els); + if (ret) { + evlist_streams__delete(els); + return NULL; + } + + return els; +} diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h new file mode 100644 index 000000000000..35bc64df554c --- /dev/null +++ b/tools/perf/util/stream.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_STREAM_H +#define __PERF_STREAM_H + +#include "callchain.h" + +struct stream { + struct callchain_node *cnode; +}; + +struct evsel_streams { + struct stream *streams; + int nr_streams_max; + int nr_streams; + int evsel_idx; +}; + +struct evlist_streams { + struct evsel_streams *ev_streams; + int nr_evsel; +}; + +struct evlist; + +void evlist_streams__delete(struct evlist_streams *els); + +struct evlist_streams *evlist__create_streams(struct evlist *evlist, + int nr_streams_max); + +#endif /* __PERF_STREAM_H */ -- cgit v1.3.1 From dd1d841810158086086417133a22c90dfabaeecb Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 9 Oct 2020 10:28:40 +0800 Subject: perf streams: Get the evsel_streams by evsel_idx In previous patch, we have created evsel_streams array. This patch returns the specified evsel_streams according to the evsel_idx. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201009022845.13141-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stream.c | 13 +++++++++++++ tools/perf/util/stream.h | 3 +++ 2 files changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c index 31efe4ae0f55..e1c7d6c6126b 100644 --- a/tools/perf/util/stream.c +++ b/tools/perf/util/stream.c @@ -162,3 +162,16 @@ struct evlist_streams *evlist__create_streams(struct evlist *evlist, return els; } + +struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, + int evsel_idx) +{ + struct evsel_streams *es = els->ev_streams; + + for (int i = 0; i < els->nr_evsel; i++) { + if (es[i].evsel_idx == evsel_idx) + return &es[i]; + } + + return NULL; +} diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h index 35bc64df554c..f01335677479 100644 --- a/tools/perf/util/stream.h +++ b/tools/perf/util/stream.h @@ -27,4 +27,7 @@ void evlist_streams__delete(struct evlist_streams *els); struct evlist_streams *evlist__create_streams(struct evlist *evlist, int nr_streams_max); +struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, + int evsel_idx); + #endif /* __PERF_STREAM_H */ -- cgit v1.3.1 From 47ef8398c369fa77e0fdbadfebb3885879491721 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 9 Oct 2020 10:28:41 +0800 Subject: perf streams: Compare two streams Stream is the branch history which is aggregated by the branch records from perf samples. Now we support the callchain as stream. If the callchain entries of one stream are fully matched with the callchain entries of another stream, we think two streams are matched. For example, cycles: 1, hits: 26.80% cycles: 1, hits: 27.30% ----------------------- ----------------------- main div.c:39 main div.c:39 main div.c:44 main div.c:44 Above two streams are matched (we don't consider the case that source code is changed). The matching logic is, compare the chain string first. If it's not matched, fallback to dso address comparison. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201009022845.13141-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/callchain.h | 4 ++++ 2 files changed, 58 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 2775b752f2fa..d356e73c5622 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1613,3 +1613,57 @@ void callchain_param_setup(u64 sample_type) callchain_param.record_mode = CALLCHAIN_FP; } } + +static bool chain_match(struct callchain_list *base_chain, + struct callchain_list *pair_chain) +{ + enum match_result match; + + match = match_chain_strings(base_chain->srcline, + pair_chain->srcline); + if (match != MATCH_ERROR) + return match == MATCH_EQ; + + match = match_chain_dso_addresses(base_chain->ms.map, + base_chain->ip, + pair_chain->ms.map, + pair_chain->ip); + + return match == MATCH_EQ; +} + +bool callchain_cnode_matched(struct callchain_node *base_cnode, + struct callchain_node *pair_cnode) +{ + struct callchain_list *base_chain, *pair_chain; + bool match = false; + + pair_chain = list_first_entry(&pair_cnode->val, + struct callchain_list, + list); + + list_for_each_entry(base_chain, &base_cnode->val, list) { + if (&pair_chain->list == &pair_cnode->val) + return false; + + if (!base_chain->srcline || !pair_chain->srcline) { + pair_chain = list_next_entry(pair_chain, list); + continue; + } + + match = chain_match(base_chain, pair_chain); + if (!match) + return false; + + pair_chain = list_next_entry(pair_chain, list); + } + + /* + * Say chain1 is ABC, chain2 is ABCD, we consider they are + * not fully matched. + */ + if (pair_chain && (&pair_chain->list != &pair_cnode->val)) + return false; + + return match; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index fe36a9e5ccd1..ad27fc8c7948 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -298,4 +298,8 @@ int callchain_branch_counts(struct callchain_root *root, u64 *abort_count, u64 *cycles_count); void callchain_param_setup(u64 sample_type); + +bool callchain_cnode_matched(struct callchain_node *base_cnode, + struct callchain_node *pair_cnode); + #endif /* __PERF_CALLCHAIN_H */ -- cgit v1.3.1 From fa79aa6485816b86f37205fd5659616d64f8be7f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 9 Oct 2020 10:28:42 +0800 Subject: perf streams: Link stream pair In previous patch, we have created an evsel_streams for one event, and top N hottest streams will be saved in a stream array in evsel_streams. This patch compares total streams among two evsel_streams. Once two streams are fully matched, they will be linked as a pair. From the pair, we can know which streams are matched. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201009022845.13141-5-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stream.c | 40 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/stream.h | 4 ++++ 2 files changed, 44 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c index e1c7d6c6126b..0d6a7452320a 100644 --- a/tools/perf/util/stream.c +++ b/tools/perf/util/stream.c @@ -175,3 +175,43 @@ struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, return NULL; } + +static struct stream *stream__callchain_match(struct stream *base_stream, + struct evsel_streams *es_pair) +{ + for (int i = 0; i < es_pair->nr_streams; i++) { + struct stream *pair_stream = &es_pair->streams[i]; + + if (callchain_cnode_matched(base_stream->cnode, + pair_stream->cnode)) { + return pair_stream; + } + } + + return NULL; +} + +static struct stream *stream__match(struct stream *base_stream, + struct evsel_streams *es_pair) +{ + return stream__callchain_match(base_stream, es_pair); +} + +static void stream__link(struct stream *base_stream, struct stream *pair_stream) +{ + base_stream->pair_cnode = pair_stream->cnode; + pair_stream->pair_cnode = base_stream->cnode; +} + +void evsel_streams__match(struct evsel_streams *es_base, + struct evsel_streams *es_pair) +{ + for (int i = 0; i < es_base->nr_streams; i++) { + struct stream *base_stream = &es_base->streams[i]; + struct stream *pair_stream; + + pair_stream = stream__match(base_stream, es_pair); + if (pair_stream) + stream__link(base_stream, pair_stream); + } +} diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h index f01335677479..cb131f41f5b1 100644 --- a/tools/perf/util/stream.h +++ b/tools/perf/util/stream.h @@ -6,6 +6,7 @@ struct stream { struct callchain_node *cnode; + struct callchain_node *pair_cnode; }; struct evsel_streams { @@ -30,4 +31,7 @@ struct evlist_streams *evlist__create_streams(struct evlist *evlist, struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, int evsel_idx); +void evsel_streams__match(struct evsel_streams *es_base, + struct evsel_streams *es_pair); + #endif /* __PERF_STREAM_H */ -- cgit v1.3.1 From 28904f4dcea1444adea4b51ba0af32d6c7314101 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 9 Oct 2020 10:28:43 +0800 Subject: perf streams: Calculate the sum of total streams hits We have used callchain_node->hit to measure the hot level of one stream. This patch calculates the sum of hits of total streams. Thus in next patch, we can use following formula to report hot percent for one stream. hot percent = callchain_node->hit / sum of total hits Signed-off-by: Jin Yao Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201009022845.13141-6-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 32 ++++++++++++++++++++++++++++++++ tools/perf/util/callchain.h | 3 +++ tools/perf/util/stream.c | 2 ++ tools/perf/util/stream.h | 1 + 4 files changed, 38 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index d356e73c5622..4f824bfcc072 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1667,3 +1667,35 @@ bool callchain_cnode_matched(struct callchain_node *base_cnode, return match; } + +static u64 count_callchain_hits(struct hist_entry *he) +{ + struct rb_root *root = &he->sorted_chain; + struct rb_node *rb_node = rb_first(root); + struct callchain_node *node; + u64 chain_hits = 0; + + while (rb_node) { + node = rb_entry(rb_node, struct callchain_node, rb_node); + chain_hits += node->hit; + rb_node = rb_next(rb_node); + } + + return chain_hits; +} + +u64 callchain_total_hits(struct hists *hists) +{ + struct rb_node *next = rb_first_cached(&hists->entries); + u64 chain_hits = 0; + + while (next) { + struct hist_entry *he = rb_entry(next, struct hist_entry, + rb_node); + + chain_hits += count_callchain_hits(he); + next = rb_next(&he->rb_node); + } + + return chain_hits; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index ad27fc8c7948..ac5bea9c1eb7 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -13,6 +13,7 @@ struct ip_callchain; struct map; struct perf_sample; struct thread; +struct hists; #define HELP_PAD "\t\t\t\t" @@ -302,4 +303,6 @@ void callchain_param_setup(u64 sample_type); bool callchain_cnode_matched(struct callchain_node *base_cnode, struct callchain_node *pair_cnode); +u64 callchain_total_hits(struct hists *hists); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c index 0d6a7452320a..47c5956b3378 100644 --- a/tools/perf/util/stream.c +++ b/tools/perf/util/stream.c @@ -121,6 +121,8 @@ static void init_hot_callchain(struct hists *hists, struct evsel_streams *es) update_hot_callchain(he, es); next = rb_next(&he->rb_node); } + + es->streams_hits = callchain_total_hits(hists); } static int evlist__init_callchain_streams(struct evlist *evlist, diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h index cb131f41f5b1..a61072eda64d 100644 --- a/tools/perf/util/stream.h +++ b/tools/perf/util/stream.h @@ -14,6 +14,7 @@ struct evsel_streams { int nr_streams_max; int nr_streams; int evsel_idx; + u64 streams_hits; }; struct evlist_streams { -- cgit v1.3.1 From 5bbd6bad3b058bb28f41b97bf67a3cc3e52d09e6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 9 Oct 2020 10:28:44 +0800 Subject: perf streams: Report hot streams We show the streams separately. They are divided into different sections. 1. "Matched hot streams" 2. "Hot streams in old perf data only" 3. "Hot streams in new perf data only". For each stream, we report the cycles and hot percent (hits%). For example, cycles: 2, hits: 4.08% -------------------------- main div.c:42 compute_flag div.c:28 Signed-off-by: Jin Yao Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201009022845.13141-7-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 13 +++++ tools/perf/util/callchain.h | 2 + tools/perf/util/stream.c | 123 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/stream.h | 3 ++ 4 files changed, 141 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 4f824bfcc072..1b60985690bb 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1699,3 +1699,16 @@ u64 callchain_total_hits(struct hists *hists) return chain_hits; } + +s64 callchain_avg_cycles(struct callchain_node *cnode) +{ + struct callchain_list *chain; + s64 cycles = 0; + + list_for_each_entry(chain, &cnode->val, list) { + if (chain->srcline && chain->branch_count) + cycles += chain->cycles_count / chain->branch_count; + } + + return cycles; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index ac5bea9c1eb7..5824134f983b 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -305,4 +305,6 @@ bool callchain_cnode_matched(struct callchain_node *base_cnode, u64 callchain_total_hits(struct hists *hists); +s64 callchain_avg_cycles(struct callchain_node *cnode); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c index 47c5956b3378..4bd5e5a00aa5 100644 --- a/tools/perf/util/stream.c +++ b/tools/perf/util/stream.c @@ -217,3 +217,126 @@ void evsel_streams__match(struct evsel_streams *es_base, stream__link(base_stream, pair_stream); } } + +static void print_callchain_pair(struct stream *base_stream, int idx, + struct evsel_streams *es_base, + struct evsel_streams *es_pair) +{ + struct callchain_node *base_cnode = base_stream->cnode; + struct callchain_node *pair_cnode = base_stream->pair_cnode; + struct callchain_list *base_chain, *pair_chain; + char buf1[512], buf2[512], cbuf1[256], cbuf2[256]; + char *s1, *s2; + double pct; + + printf("\nhot chain pair %d:\n", idx); + + pct = (double)base_cnode->hit / (double)es_base->streams_hits; + scnprintf(buf1, sizeof(buf1), "cycles: %ld, hits: %.2f%%", + callchain_avg_cycles(base_cnode), pct * 100.0); + + pct = (double)pair_cnode->hit / (double)es_pair->streams_hits; + scnprintf(buf2, sizeof(buf2), "cycles: %ld, hits: %.2f%%", + callchain_avg_cycles(pair_cnode), pct * 100.0); + + printf("%35s\t%35s\n", buf1, buf2); + + printf("%35s\t%35s\n", + "---------------------------", + "--------------------------"); + + pair_chain = list_first_entry(&pair_cnode->val, + struct callchain_list, + list); + + list_for_each_entry(base_chain, &base_cnode->val, list) { + if (&pair_chain->list == &pair_cnode->val) + return; + + s1 = callchain_list__sym_name(base_chain, cbuf1, sizeof(cbuf1), + false); + s2 = callchain_list__sym_name(pair_chain, cbuf2, sizeof(cbuf2), + false); + + scnprintf(buf1, sizeof(buf1), "%35s\t%35s", s1, s2); + printf("%s\n", buf1); + pair_chain = list_next_entry(pair_chain, list); + } +} + +static void print_stream_callchain(struct stream *stream, int idx, + struct evsel_streams *es, bool pair) +{ + struct callchain_node *cnode = stream->cnode; + struct callchain_list *chain; + char buf[512], cbuf[256], *s; + double pct; + + printf("\nhot chain %d:\n", idx); + + pct = (double)cnode->hit / (double)es->streams_hits; + scnprintf(buf, sizeof(buf), "cycles: %ld, hits: %.2f%%", + callchain_avg_cycles(cnode), pct * 100.0); + + if (pair) { + printf("%35s\t%35s\n", "", buf); + printf("%35s\t%35s\n", + "", "--------------------------"); + } else { + printf("%35s\n", buf); + printf("%35s\n", "--------------------------"); + } + + list_for_each_entry(chain, &cnode->val, list) { + s = callchain_list__sym_name(chain, cbuf, sizeof(cbuf), false); + + if (pair) + scnprintf(buf, sizeof(buf), "%35s\t%35s", "", s); + else + scnprintf(buf, sizeof(buf), "%35s", s); + + printf("%s\n", buf); + } +} + +static void callchain_streams_report(struct evsel_streams *es_base, + struct evsel_streams *es_pair) +{ + struct stream *base_stream; + int i, idx = 0; + + printf("[ Matched hot streams ]\n"); + for (i = 0; i < es_base->nr_streams; i++) { + base_stream = &es_base->streams[i]; + if (base_stream->pair_cnode) { + print_callchain_pair(base_stream, ++idx, + es_base, es_pair); + } + } + + idx = 0; + printf("\n[ Hot streams in old perf data only ]\n"); + for (i = 0; i < es_base->nr_streams; i++) { + base_stream = &es_base->streams[i]; + if (!base_stream->pair_cnode) { + print_stream_callchain(base_stream, ++idx, + es_base, false); + } + } + + idx = 0; + printf("\n[ Hot streams in new perf data only ]\n"); + for (i = 0; i < es_pair->nr_streams; i++) { + base_stream = &es_pair->streams[i]; + if (!base_stream->pair_cnode) { + print_stream_callchain(base_stream, ++idx, + es_pair, true); + } + } +} + +void evsel_streams__report(struct evsel_streams *es_base, + struct evsel_streams *es_pair) +{ + return callchain_streams_report(es_base, es_pair); +} diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h index a61072eda64d..bee768874fea 100644 --- a/tools/perf/util/stream.h +++ b/tools/perf/util/stream.h @@ -35,4 +35,7 @@ struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, void evsel_streams__match(struct evsel_streams *es_base, struct evsel_streams *es_pair); +void evsel_streams__report(struct evsel_streams *es_base, + struct evsel_streams *es_pair); + #endif /* __PERF_STREAM_H */ -- cgit v1.3.1 From 2a09a84c720b436a3ded9b7e3701d391ec81892a Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 9 Oct 2020 10:28:45 +0800 Subject: perf diff: Support hot streams comparison This patch enables perf-diff with "--stream" option. "--stream": Enable hot streams comparison Now let's see example. perf record -b ... Generate perf.data.old with branch data perf record -b ... Generate perf.data with branch data perf diff --stream [ Matched hot streams ] hot chain pair 1: cycles: 1, hits: 27.77% cycles: 1, hits: 9.24% --------------------------- -------------------------- main div.c:39 main div.c:39 main div.c:44 main div.c:44 hot chain pair 2: cycles: 34, hits: 20.06% cycles: 27, hits: 16.98% --------------------------- -------------------------- __random_r random_r.c:360 __random_r random_r.c:360 __random_r random_r.c:388 __random_r random_r.c:388 __random_r random_r.c:388 __random_r random_r.c:388 __random_r random_r.c:380 __random_r random_r.c:380 __random_r random_r.c:357 __random_r random_r.c:357 __random random.c:293 __random random.c:293 __random random.c:293 __random random.c:293 __random random.c:291 __random random.c:291 __random random.c:291 __random random.c:291 __random random.c:291 __random random.c:291 __random random.c:288 __random random.c:288 rand rand.c:27 rand rand.c:27 rand rand.c:26 rand rand.c:26 rand@plt rand@plt rand@plt rand@plt compute_flag div.c:25 compute_flag div.c:25 compute_flag div.c:22 compute_flag div.c:22 main div.c:40 main div.c:40 main div.c:40 main div.c:40 main div.c:39 main div.c:39 hot chain pair 3: cycles: 9, hits: 4.48% cycles: 6, hits: 4.51% --------------------------- -------------------------- __random_r random_r.c:360 __random_r random_r.c:360 __random_r random_r.c:388 __random_r random_r.c:388 __random_r random_r.c:388 __random_r random_r.c:388 __random_r random_r.c:380 __random_r random_r.c:380 [ Hot streams in old perf data only ] hot chain 1: cycles: 18, hits: 6.75% -------------------------- __random_r random_r.c:360 __random_r random_r.c:388 __random_r random_r.c:388 __random_r random_r.c:380 __random_r random_r.c:357 __random random.c:293 __random random.c:293 __random random.c:291 __random random.c:291 __random random.c:291 __random random.c:288 rand rand.c:27 rand rand.c:26 rand@plt rand@plt compute_flag div.c:25 compute_flag div.c:22 main div.c:40 hot chain 2: cycles: 29, hits: 2.78% -------------------------- compute_flag div.c:22 main div.c:40 main div.c:40 main div.c:39 [ Hot streams in new perf data only ] hot chain 1: cycles: 4, hits: 4.54% -------------------------- main div.c:42 compute_flag div.c:28 hot chain 2: cycles: 5, hits: 3.51% -------------------------- main div.c:39 main div.c:44 main div.c:42 compute_flag div.c:28 Signed-off-by: Jin Yao Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201009022845.13141-8-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-diff.txt | 4 ++ tools/perf/builtin-diff.c | 119 +++++++++++++++++++++++++++++---- 2 files changed, 110 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index f50ca0fef0a4..be65bd55ab2a 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -182,6 +182,10 @@ OPTIONS --tid=:: Only diff samples for given thread ID (comma separated list). +--stream:: + Enable hot streams comparison. Stream can be a callchain which is + aggregated by the branch records from samples. + COMPARISON ---------- The comparison is governed by the baseline file. The baseline perf.data diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f8c9bdd8269a..584e2e1a3793 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -25,6 +25,7 @@ #include "util/map.h" #include "util/spark.h" #include "util/block-info.h" +#include "util/stream.h" #include #include #include @@ -42,6 +43,7 @@ struct perf_diff { int range_size; int range_num; bool has_br_stack; + bool stream; }; /* Diff command specific HPP columns. */ @@ -72,6 +74,7 @@ struct data__file { struct perf_data data; int idx; struct hists *hists; + struct evlist_streams *evlist_streams; struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX]; }; @@ -106,6 +109,7 @@ enum { COMPUTE_DELTA_ABS, COMPUTE_CYCLES, COMPUTE_MAX, + COMPUTE_STREAM, /* After COMPUTE_MAX to avoid use current compute arrays */ }; const char *compute_names[COMPUTE_MAX] = { @@ -393,6 +397,11 @@ static int diff__process_sample_event(struct perf_tool *tool, struct perf_diff *pdiff = container_of(tool, struct perf_diff, tool); struct addr_location al; struct hists *hists = evsel__hists(evsel); + struct hist_entry_iter iter = { + .evsel = evsel, + .sample = sample, + .ops = &hist_iter_normal, + }; int ret = -1; if (perf_time__ranges_skip_sample(pdiff->ptime_range, pdiff->range_num, @@ -411,14 +420,8 @@ static int diff__process_sample_event(struct perf_tool *tool, goto out_put; } - if (compute != COMPUTE_CYCLES) { - if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, - true)) { - pr_warning("problem incrementing symbol period, " - "skipping event\n"); - goto out_put; - } - } else { + switch (compute) { + case COMPUTE_CYCLES: if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL, NULL, NULL, sample, true)) { pr_warning("problem incrementing symbol period, " @@ -428,6 +431,23 @@ static int diff__process_sample_event(struct perf_tool *tool, hist__account_cycles(sample->branch_stack, &al, sample, false, NULL); + break; + + case COMPUTE_STREAM: + if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + NULL)) { + pr_debug("problem adding hist entry, skipping event\n"); + goto out_put; + } + break; + + default: + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, + true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } } /* @@ -996,10 +1016,55 @@ static void data_process(void) } } +static int process_base_stream(struct data__file *data_base, + struct data__file *data_pair, + const char *title __maybe_unused) +{ + struct evlist *evlist_base = data_base->session->evlist; + struct evlist *evlist_pair = data_pair->session->evlist; + struct evsel *evsel_base, *evsel_pair; + struct evsel_streams *es_base, *es_pair; + + evlist__for_each_entry(evlist_base, evsel_base) { + evsel_pair = evsel_match(evsel_base, evlist_pair); + if (!evsel_pair) + continue; + + es_base = evsel_streams__entry(data_base->evlist_streams, + evsel_base->idx); + if (!es_base) + return -1; + + es_pair = evsel_streams__entry(data_pair->evlist_streams, + evsel_pair->idx); + if (!es_pair) + return -1; + + evsel_streams__match(es_base, es_pair); + evsel_streams__report(es_base, es_pair); + } + + return 0; +} + +static void stream_process(void) +{ + /* + * Stream comparison only supports two data files. + * perf.data.old and perf.data. data__files[0] is perf.data.old, + * data__files[1] is perf.data. + */ + process_base_stream(&data__files[0], &data__files[1], + "# Output based on old perf data:\n#\n"); +} + static void data__free(struct data__file *d) { int col; + if (d->evlist_streams) + evlist_streams__delete(d->evlist_streams); + for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) { struct diff_hpp_fmt *fmt = &d->fmt[col]; @@ -1153,9 +1218,19 @@ static int __cmd_diff(void) if (pdiff.ptime_range) zfree(&pdiff.ptime_range); + + if (compute == COMPUTE_STREAM) { + d->evlist_streams = evlist__create_streams( + d->session->evlist, 5); + if (!d->evlist_streams) + goto out_delete; + } } - data_process(); + if (compute == COMPUTE_STREAM) + stream_process(); + else + data_process(); out_delete: data__for_each_file(i, d) { @@ -1228,6 +1303,8 @@ static const struct option options[] = { "only consider symbols in these pids"), OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", "only consider symbols in these tids"), + OPT_BOOLEAN(0, "stream", &pdiff.stream, + "Enable hot streams comparison."), OPT_END() }; @@ -1887,6 +1964,9 @@ int cmd_diff(int argc, const char **argv) if (cycles_hist && (compute != COMPUTE_CYCLES)) usage_with_options(diff_usage, options); + if (pdiff.stream) + compute = COMPUTE_STREAM; + symbol__annotation_init(); if (symbol__init(NULL) < 0) @@ -1898,13 +1978,26 @@ int cmd_diff(int argc, const char **argv) if (check_file_brstack() < 0) return -1; - if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack) + if ((compute == COMPUTE_CYCLES || compute == COMPUTE_STREAM) + && !pdiff.has_br_stack) { return -1; + } - if (ui_init() < 0) - return -1; + if (compute == COMPUTE_STREAM) { + symbol_conf.show_branchflag_count = true; + symbol_conf.disable_add2line_warn = true; + callchain_param.mode = CHAIN_FLAT; + callchain_param.key = CCKEY_SRCLINE; + callchain_param.branch_callstack = 1; + symbol_conf.use_callchain = true; + callchain_register_param(&callchain_param); + sort_order = "srcline,symbol,dso"; + } else { + if (ui_init() < 0) + return -1; - sort__mode = SORT_MODE__DIFF; + sort__mode = SORT_MODE__DIFF; + } if (setup_sorting(NULL) < 0) usage_with_options(diff_usage, options); -- cgit v1.3.1 From caf7f9685dd339d4fe7c3cbc0b5d7fabd3df784d Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Oct 2020 23:19:28 +0800 Subject: perf jevents: Fix event code for events referencing std arch events The event code for events referencing std arch events is incorrectly evaluated in json_events(). The issue is that je.event is evaluated properly from try_fixup(), but later NULLified from the real_event() call, as "event" may be NULL. Fix by setting "event" same je.event in try_fixup(). Also remove support for overwriting event code for events using std arch events, as it is not used. Signed-off-by: John Garry Reviewed-By: Kajol Jain Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/1602170368-11892-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 99df41a9543d..e47644cab3fa 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -505,20 +505,15 @@ static char *real_event(const char *name, char *event) } static int -try_fixup(const char *fn, char *arch_std, unsigned long long eventcode, - struct json_event *je) +try_fixup(const char *fn, char *arch_std, struct json_event *je, char **event) { /* try to find matching event from arch standard values */ struct event_struct *es; list_for_each_entry(es, &arch_std_events, list) { if (!strcmp(arch_std, es->name)) { - if (!eventcode && es->event) { - /* allow EventCode to be overridden */ - free(je->event); - je->event = NULL; - } FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD); + *event = je->event; return 0; } } @@ -678,7 +673,7 @@ static int json_events(const char *fn, * An arch standard event is referenced, so try to * fixup any unassigned values. */ - err = try_fixup(fn, arch_std, eventcode, &je); + err = try_fixup(fn, arch_std, &je, &event); if (err) goto free_strings; } -- cgit v1.3.1 From f92993851f01fa5d8892329d30867acc87180d39 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 12 Oct 2020 09:16:11 -0700 Subject: perf bench: Use condition variables in numa. The existing approach to synchronization between threads in the numa benchmark is unbalanced mutexes. This synchronization causes thread sanitizer to warn of locks being taken twice on a thread without an unlock, as well as unlocks with no corresponding locks. This change replaces the synchronization with more regular condition variables. While this fixes one class of thread sanitizer warnings, there still remain warnings of data races due to threads reading and writing shared memory without any atomics. Committer testing: Basic run on a non-NUMA machine. # perf bench numa # List of available benchmarks for collection 'numa': mem: Benchmark for NUMA workloads all: Run all NUMA benchmarks # perf bench numa all # Running numa/mem benchmark... # Running main, "perf bench numa numa-mem" # # Running test on: Linux five 5.8.12-200.fc32.x86_64 #1 SMP Mon Sep 28 12:17:31 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux # # Running RAM-bw-local, "perf bench numa mem -p 1 -t 1 -P 1024 -C 0 -M 0 -s 20 -zZq --thp 1 --no-data_rand_walk" 20.076 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.073 secs average thread-runtime 0.190 % difference between max/avg runtime 241.828 GB data processed, per thread 241.828 GB data processed, total 0.083 nsecs/byte/thread runtime 12.045 GB/sec/thread speed 12.045 GB/sec total speed # Running RAM-bw-local-NOTHP, "perf bench numa mem -p 1 -t 1 -P 1024 -C 0 -M 0 -s 20 -zZq --thp 1 --no-data_rand_walk --thp -1" 20.045 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.014 secs average thread-runtime 0.111 % difference between max/avg runtime 234.304 GB data processed, per thread 234.304 GB data processed, total 0.086 nsecs/byte/thread runtime 11.689 GB/sec/thread speed 11.689 GB/sec total speed # Running RAM-bw-remote, "perf bench numa mem -p 1 -t 1 -P 1024 -C 0 -M 1 -s 20 -zZq --thp 1 --no-data_rand_walk" Test not applicable, system has only 1 nodes. # Running RAM-bw-local-2x, "perf bench numa mem -p 2 -t 1 -P 1024 -C 0,2 -M 0x2 -s 20 -zZq --thp 1 --no-data_rand_walk" 20.138 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.121 secs average thread-runtime 0.342 % difference between max/avg runtime 135.961 GB data processed, per thread 271.922 GB data processed, total 0.148 nsecs/byte/thread runtime 6.752 GB/sec/thread speed 13.503 GB/sec total speed # Running RAM-bw-remote-2x, "perf bench numa mem -p 2 -t 1 -P 1024 -C 0,2 -M 1x2 -s 20 -zZq --thp 1 --no-data_rand_walk" Test not applicable, system has only 1 nodes. # Running RAM-bw-cross, "perf bench numa mem -p 2 -t 1 -P 1024 -C 0,8 -M 1,0 -s 20 -zZq --thp 1 --no-data_rand_walk" Test not applicable, system has only 1 nodes. # Running 1x3-convergence, "perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp 1" 0.747 secs latency to NUMA-converge 0.747 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.714 secs average thread-runtime 50.000 % difference between max/avg runtime 3.228 GB data processed, per thread 9.683 GB data processed, total 0.231 nsecs/byte/thread runtime 4.321 GB/sec/thread speed 12.964 GB/sec total speed # Running 1x4-convergence, "perf bench numa mem -p 1 -t 4 -P 512 -s 100 -zZ0qcm --thp 1" 1.127 secs latency to NUMA-converge 1.127 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.089 secs average thread-runtime 5.624 % difference between max/avg runtime 3.765 GB data processed, per thread 15.062 GB data processed, total 0.299 nsecs/byte/thread runtime 3.342 GB/sec/thread speed 13.368 GB/sec total speed # Running 1x6-convergence, "perf bench numa mem -p 1 -t 6 -P 1020 -s 100 -zZ0qcm --thp 1" 1.003 secs latency to NUMA-converge 1.003 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.889 secs average thread-runtime 50.000 % difference between max/avg runtime 2.141 GB data processed, per thread 12.847 GB data processed, total 0.469 nsecs/byte/thread runtime 2.134 GB/sec/thread speed 12.805 GB/sec total speed # Running 2x3-convergence, "perf bench numa mem -p 2 -t 3 -P 1020 -s 100 -zZ0qcm --thp 1" 1.814 secs latency to NUMA-converge 1.814 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.716 secs average thread-runtime 22.440 % difference between max/avg runtime 3.747 GB data processed, per thread 22.483 GB data processed, total 0.484 nsecs/byte/thread runtime 2.065 GB/sec/thread speed 12.393 GB/sec total speed # Running 3x3-convergence, "perf bench numa mem -p 3 -t 3 -P 1020 -s 100 -zZ0qcm --thp 1" 2.065 secs latency to NUMA-converge 2.065 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.947 secs average thread-runtime 25.788 % difference between max/avg runtime 2.855 GB data processed, per thread 25.694 GB data processed, total 0.723 nsecs/byte/thread runtime 1.382 GB/sec/thread speed 12.442 GB/sec total speed # Running 4x4-convergence, "perf bench numa mem -p 4 -t 4 -P 512 -s 100 -zZ0qcm --thp 1" 1.912 secs latency to NUMA-converge 1.912 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.775 secs average thread-runtime 23.852 % difference between max/avg runtime 1.479 GB data processed, per thread 23.668 GB data processed, total 1.293 nsecs/byte/thread runtime 0.774 GB/sec/thread speed 12.378 GB/sec total speed # Running 4x4-convergence-NOTHP, "perf bench numa mem -p 4 -t 4 -P 512 -s 100 -zZ0qcm --thp 1 --thp -1" 1.783 secs latency to NUMA-converge 1.783 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.633 secs average thread-runtime 21.960 % difference between max/avg runtime 1.345 GB data processed, per thread 21.517 GB data processed, total 1.326 nsecs/byte/thread runtime 0.754 GB/sec/thread speed 12.067 GB/sec total speed # Running 4x6-convergence, "perf bench numa mem -p 4 -t 6 -P 1020 -s 100 -zZ0qcm --thp 1" 5.396 secs latency to NUMA-converge 5.396 secs slowest (max) thread-runtime 4.000 secs fastest (min) thread-runtime 4.928 secs average thread-runtime 12.937 % difference between max/avg runtime 2.721 GB data processed, per thread 65.306 GB data processed, total 1.983 nsecs/byte/thread runtime 0.504 GB/sec/thread speed 12.102 GB/sec total speed # Running 4x8-convergence, "perf bench numa mem -p 4 -t 8 -P 512 -s 100 -zZ0qcm --thp 1" 3.121 secs latency to NUMA-converge 3.121 secs slowest (max) thread-runtime 2.000 secs fastest (min) thread-runtime 2.836 secs average thread-runtime 17.962 % difference between max/avg runtime 1.194 GB data processed, per thread 38.192 GB data processed, total 2.615 nsecs/byte/thread runtime 0.382 GB/sec/thread speed 12.236 GB/sec total speed # Running 8x4-convergence, "perf bench numa mem -p 8 -t 4 -P 512 -s 100 -zZ0qcm --thp 1" 4.302 secs latency to NUMA-converge 4.302 secs slowest (max) thread-runtime 3.000 secs fastest (min) thread-runtime 4.045 secs average thread-runtime 15.133 % difference between max/avg runtime 1.631 GB data processed, per thread 52.178 GB data processed, total 2.638 nsecs/byte/thread runtime 0.379 GB/sec/thread speed 12.128 GB/sec total speed # Running 8x4-convergence-NOTHP, "perf bench numa mem -p 8 -t 4 -P 512 -s 100 -zZ0qcm --thp 1 --thp -1" 4.418 secs latency to NUMA-converge 4.418 secs slowest (max) thread-runtime 3.000 secs fastest (min) thread-runtime 4.104 secs average thread-runtime 16.045 % difference between max/avg runtime 1.664 GB data processed, per thread 53.254 GB data processed, total 2.655 nsecs/byte/thread runtime 0.377 GB/sec/thread speed 12.055 GB/sec total speed # Running 3x1-convergence, "perf bench numa mem -p 3 -t 1 -P 512 -s 100 -zZ0qcm --thp 1" 0.973 secs latency to NUMA-converge 0.973 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.955 secs average thread-runtime 50.000 % difference between max/avg runtime 4.124 GB data processed, per thread 12.372 GB data processed, total 0.236 nsecs/byte/thread runtime 4.238 GB/sec/thread speed 12.715 GB/sec total speed # Running 4x1-convergence, "perf bench numa mem -p 4 -t 1 -P 512 -s 100 -zZ0qcm --thp 1" 0.820 secs latency to NUMA-converge 0.820 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.808 secs average thread-runtime 50.000 % difference between max/avg runtime 2.555 GB data processed, per thread 10.220 GB data processed, total 0.321 nsecs/byte/thread runtime 3.117 GB/sec/thread speed 12.468 GB/sec total speed # Running 8x1-convergence, "perf bench numa mem -p 8 -t 1 -P 512 -s 100 -zZ0qcm --thp 1" 0.667 secs latency to NUMA-converge 0.667 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.607 secs average thread-runtime 50.000 % difference between max/avg runtime 1.009 GB data processed, per thread 8.069 GB data processed, total 0.661 nsecs/byte/thread runtime 1.512 GB/sec/thread speed 12.095 GB/sec total speed # Running 16x1-convergence, "perf bench numa mem -p 16 -t 1 -P 256 -s 100 -zZ0qcm --thp 1" 1.546 secs latency to NUMA-converge 1.546 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.485 secs average thread-runtime 17.664 % difference between max/avg runtime 1.162 GB data processed, per thread 18.594 GB data processed, total 1.331 nsecs/byte/thread runtime 0.752 GB/sec/thread speed 12.025 GB/sec total speed # Running 32x1-convergence, "perf bench numa mem -p 32 -t 1 -P 128 -s 100 -zZ0qcm --thp 1" 0.812 secs latency to NUMA-converge 0.812 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.739 secs average thread-runtime 50.000 % difference between max/avg runtime 0.309 GB data processed, per thread 9.874 GB data processed, total 2.630 nsecs/byte/thread runtime 0.380 GB/sec/thread speed 12.166 GB/sec total speed # Running 2x1-bw-process, "perf bench numa mem -p 2 -t 1 -P 1024 -s 20 -zZ0q --thp 1" 20.044 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.020 secs average thread-runtime 0.109 % difference between max/avg runtime 125.750 GB data processed, per thread 251.501 GB data processed, total 0.159 nsecs/byte/thread runtime 6.274 GB/sec/thread speed 12.548 GB/sec total speed # Running 3x1-bw-process, "perf bench numa mem -p 3 -t 1 -P 1024 -s 20 -zZ0q --thp 1" 20.148 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.090 secs average thread-runtime 0.367 % difference between max/avg runtime 85.267 GB data processed, per thread 255.800 GB data processed, total 0.236 nsecs/byte/thread runtime 4.232 GB/sec/thread speed 12.696 GB/sec total speed # Running 4x1-bw-process, "perf bench numa mem -p 4 -t 1 -P 1024 -s 20 -zZ0q --thp 1" 20.169 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.100 secs average thread-runtime 0.419 % difference between max/avg runtime 63.144 GB data processed, per thread 252.576 GB data processed, total 0.319 nsecs/byte/thread runtime 3.131 GB/sec/thread speed 12.523 GB/sec total speed # Running 8x1-bw-process, "perf bench numa mem -p 8 -t 1 -P 512 -s 20 -zZ0q --thp 1" 20.175 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.107 secs average thread-runtime 0.433 % difference between max/avg runtime 31.267 GB data processed, per thread 250.133 GB data processed, total 0.645 nsecs/byte/thread runtime 1.550 GB/sec/thread speed 12.398 GB/sec total speed # Running 8x1-bw-process-NOTHP, "perf bench numa mem -p 8 -t 1 -P 512 -s 20 -zZ0q --thp 1 --thp -1" 20.216 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.113 secs average thread-runtime 0.535 % difference between max/avg runtime 30.998 GB data processed, per thread 247.981 GB data processed, total 0.652 nsecs/byte/thread runtime 1.533 GB/sec/thread speed 12.266 GB/sec total speed # Running 16x1-bw-process, "perf bench numa mem -p 16 -t 1 -P 256 -s 20 -zZ0q --thp 1" 20.234 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.174 secs average thread-runtime 0.577 % difference between max/avg runtime 15.377 GB data processed, per thread 246.039 GB data processed, total 1.316 nsecs/byte/thread runtime 0.760 GB/sec/thread speed 12.160 GB/sec total speed # Running 1x4-bw-thread, "perf bench numa mem -p 1 -t 4 -T 256 -s 20 -zZ0q --thp 1" 20.040 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.028 secs average thread-runtime 0.099 % difference between max/avg runtime 66.832 GB data processed, per thread 267.328 GB data processed, total 0.300 nsecs/byte/thread runtime 3.335 GB/sec/thread speed 13.340 GB/sec total speed # Running 1x8-bw-thread, "perf bench numa mem -p 1 -t 8 -T 256 -s 20 -zZ0q --thp 1" 20.064 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.034 secs average thread-runtime 0.160 % difference between max/avg runtime 32.911 GB data processed, per thread 263.286 GB data processed, total 0.610 nsecs/byte/thread runtime 1.640 GB/sec/thread speed 13.122 GB/sec total speed # Running 1x16-bw-thread, "perf bench numa mem -p 1 -t 16 -T 128 -s 20 -zZ0q --thp 1" 20.092 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.052 secs average thread-runtime 0.230 % difference between max/avg runtime 16.131 GB data processed, per thread 258.088 GB data processed, total 1.246 nsecs/byte/thread runtime 0.803 GB/sec/thread speed 12.845 GB/sec total speed # Running 1x32-bw-thread, "perf bench numa mem -p 1 -t 32 -T 64 -s 20 -zZ0q --thp 1" 20.099 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.063 secs average thread-runtime 0.247 % difference between max/avg runtime 7.962 GB data processed, per thread 254.773 GB data processed, total 2.525 nsecs/byte/thread runtime 0.396 GB/sec/thread speed 12.676 GB/sec total speed # Running 2x3-bw-process, "perf bench numa mem -p 2 -t 3 -P 512 -s 20 -zZ0q --thp 1" 20.150 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.120 secs average thread-runtime 0.372 % difference between max/avg runtime 44.827 GB data processed, per thread 268.960 GB data processed, total 0.450 nsecs/byte/thread runtime 2.225 GB/sec/thread speed 13.348 GB/sec total speed # Running 4x4-bw-process, "perf bench numa mem -p 4 -t 4 -P 512 -s 20 -zZ0q --thp 1" 20.258 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.168 secs average thread-runtime 0.636 % difference between max/avg runtime 17.079 GB data processed, per thread 273.263 GB data processed, total 1.186 nsecs/byte/thread runtime 0.843 GB/sec/thread speed 13.489 GB/sec total speed # Running 4x6-bw-process, "perf bench numa mem -p 4 -t 6 -P 512 -s 20 -zZ0q --thp 1" 20.559 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.382 secs average thread-runtime 1.359 % difference between max/avg runtime 10.758 GB data processed, per thread 258.201 GB data processed, total 1.911 nsecs/byte/thread runtime 0.523 GB/sec/thread speed 12.559 GB/sec total speed # Running 4x8-bw-process, "perf bench numa mem -p 4 -t 8 -P 512 -s 20 -zZ0q --thp 1" 20.744 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.516 secs average thread-runtime 1.792 % difference between max/avg runtime 8.069 GB data processed, per thread 258.201 GB data processed, total 2.571 nsecs/byte/thread runtime 0.389 GB/sec/thread speed 12.447 GB/sec total speed # Running 4x8-bw-process-NOTHP, "perf bench numa mem -p 4 -t 8 -P 512 -s 20 -zZ0q --thp 1 --thp -1" 20.855 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.561 secs average thread-runtime 2.050 % difference between max/avg runtime 8.069 GB data processed, per thread 258.201 GB data processed, total 2.585 nsecs/byte/thread runtime 0.387 GB/sec/thread speed 12.381 GB/sec total speed # Running 3x3-bw-process, "perf bench numa mem -p 3 -t 3 -P 512 -s 20 -zZ0q --thp 1" 20.134 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.077 secs average thread-runtime 0.333 % difference between max/avg runtime 28.091 GB data processed, per thread 252.822 GB data processed, total 0.717 nsecs/byte/thread runtime 1.395 GB/sec/thread speed 12.557 GB/sec total speed # Running 5x5-bw-process, "perf bench numa mem -p 5 -t 5 -P 512 -s 20 -zZ0q --thp 1" 20.588 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.375 secs average thread-runtime 1.427 % difference between max/avg runtime 10.177 GB data processed, per thread 254.436 GB data processed, total 2.023 nsecs/byte/thread runtime 0.494 GB/sec/thread speed 12.359 GB/sec total speed # Running 2x16-bw-process, "perf bench numa mem -p 2 -t 16 -P 512 -s 20 -zZ0q --thp 1" 20.657 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.429 secs average thread-runtime 1.589 % difference between max/avg runtime 8.170 GB data processed, per thread 261.429 GB data processed, total 2.528 nsecs/byte/thread runtime 0.395 GB/sec/thread speed 12.656 GB/sec total speed # Running 1x32-bw-process, "perf bench numa mem -p 1 -t 32 -P 2048 -s 20 -zZ0q --thp 1" 22.981 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 21.996 secs average thread-runtime 6.486 % difference between max/avg runtime 8.863 GB data processed, per thread 283.606 GB data processed, total 2.593 nsecs/byte/thread runtime 0.386 GB/sec/thread speed 12.341 GB/sec total speed # Running numa02-bw, "perf bench numa mem -p 1 -t 32 -T 32 -s 20 -zZ0q --thp 1" 20.047 secs slowest (max) thread-runtime 19.000 secs fastest (min) thread-runtime 20.026 secs average thread-runtime 2.611 % difference between max/avg runtime 8.441 GB data processed, per thread 270.111 GB data processed, total 2.375 nsecs/byte/thread runtime 0.421 GB/sec/thread speed 13.474 GB/sec total speed # Running numa02-bw-NOTHP, "perf bench numa mem -p 1 -t 32 -T 32 -s 20 -zZ0q --thp 1 --thp -1" 20.088 secs slowest (max) thread-runtime 19.000 secs fastest (min) thread-runtime 20.025 secs average thread-runtime 2.709 % difference between max/avg runtime 8.411 GB data processed, per thread 269.142 GB data processed, total 2.388 nsecs/byte/thread runtime 0.419 GB/sec/thread speed 13.398 GB/sec total speed # Running numa01-bw-thread, "perf bench numa mem -p 2 -t 16 -T 192 -s 20 -zZ0q --thp 1" 20.293 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.175 secs average thread-runtime 0.721 % difference between max/avg runtime 7.918 GB data processed, per thread 253.374 GB data processed, total 2.563 nsecs/byte/thread runtime 0.390 GB/sec/thread speed 12.486 GB/sec total speed # Running numa01-bw-thread-NOTHP, "perf bench numa mem -p 2 -t 16 -T 192 -s 20 -zZ0q --thp 1 --thp -1" 20.411 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.226 secs average thread-runtime 1.006 % difference between max/avg runtime 7.931 GB data processed, per thread 253.778 GB data processed, total 2.574 nsecs/byte/thread runtime 0.389 GB/sec/thread speed 12.434 GB/sec total speed # Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201012161611.366482-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 67 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index f85bceccc459..11726ec6285f 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -137,12 +137,13 @@ struct global_info { u8 *data; pthread_mutex_t startup_mutex; + pthread_cond_t startup_cond; int nr_tasks_started; - pthread_mutex_t startup_done_mutex; - pthread_mutex_t start_work_mutex; + pthread_cond_t start_work_cond; int nr_tasks_working; + bool start_work; pthread_mutex_t stop_work_mutex; u64 bytes_done; @@ -483,6 +484,18 @@ static void init_global_mutex(pthread_mutex_t *mutex) pthread_mutex_init(mutex, &attr); } +/* + * Return a process-shared (global) condition variable: + */ +static void init_global_cond(pthread_cond_t *cond) +{ + pthread_condattr_t attr; + + pthread_condattr_init(&attr); + pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); + pthread_cond_init(cond, &attr); +} + static int parse_cpu_list(const char *arg) { p0.cpu_list_str = strdup(arg); @@ -1136,15 +1149,18 @@ static void *worker_thread(void *__tdata) if (g->p.serialize_startup) { pthread_mutex_lock(&g->startup_mutex); g->nr_tasks_started++; + /* The last thread wakes the main process. */ + if (g->nr_tasks_started == g->p.nr_tasks) + pthread_cond_signal(&g->startup_cond); + pthread_mutex_unlock(&g->startup_mutex); /* Here we will wait for the main process to start us all at once: */ pthread_mutex_lock(&g->start_work_mutex); + g->start_work = false; g->nr_tasks_working++; - - /* Last one wake the main process: */ - if (g->nr_tasks_working == g->p.nr_tasks) - pthread_mutex_unlock(&g->startup_done_mutex); + while (!g->start_work) + pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex); pthread_mutex_unlock(&g->start_work_mutex); } @@ -1441,8 +1457,9 @@ static int init(void) /* Startup serialization: */ init_global_mutex(&g->start_work_mutex); + init_global_cond(&g->start_work_cond); init_global_mutex(&g->startup_mutex); - init_global_mutex(&g->startup_done_mutex); + init_global_cond(&g->startup_cond); init_global_mutex(&g->stop_work_mutex); init_thread_data(); @@ -1502,9 +1519,6 @@ static int __bench_numa(const char *name) pids = zalloc(g->p.nr_proc * sizeof(*pids)); pid = -1; - /* All threads try to acquire it, this way we can wait for them to start up: */ - pthread_mutex_lock(&g->start_work_mutex); - if (g->p.serialize_startup) { tprintf(" #\n"); tprintf(" # Startup synchronization: ..."); fflush(stdout); @@ -1526,22 +1540,29 @@ static int __bench_numa(const char *name) pids[i] = pid; } - /* Wait for all the threads to start up: */ - while (g->nr_tasks_started != g->p.nr_tasks) - usleep(USEC_PER_MSEC); - - BUG_ON(g->nr_tasks_started != g->p.nr_tasks); if (g->p.serialize_startup) { + bool threads_ready = false; double startup_sec; - pthread_mutex_lock(&g->startup_done_mutex); + /* + * Wait for all the threads to start up. The last thread will + * signal this process. + */ + pthread_mutex_lock(&g->startup_mutex); + while (g->nr_tasks_started != g->p.nr_tasks) + pthread_cond_wait(&g->startup_cond, &g->startup_mutex); - /* This will start all threads: */ - pthread_mutex_unlock(&g->start_work_mutex); + pthread_mutex_unlock(&g->startup_mutex); - /* This mutex is locked - the last started thread will wake us: */ - pthread_mutex_lock(&g->startup_done_mutex); + /* Wait for all threads to be at the start_work_cond. */ + while (!threads_ready) { + pthread_mutex_lock(&g->start_work_mutex); + threads_ready = (g->nr_tasks_working == g->p.nr_tasks); + pthread_mutex_unlock(&g->start_work_mutex); + if (!threads_ready) + usleep(1); + } gettimeofday(&stop, NULL); @@ -1555,7 +1576,11 @@ static int __bench_numa(const char *name) tprintf(" #\n"); start = stop; - pthread_mutex_unlock(&g->startup_done_mutex); + /* Start all threads running. */ + pthread_mutex_lock(&g->start_work_mutex); + g->start_work = true; + pthread_mutex_unlock(&g->start_work_mutex); + pthread_cond_broadcast(&g->start_work_cond); } else { gettimeofday(&start, NULL); } -- cgit v1.3.1 From 1a01727676a87945bd6b9796fc5ee894c24b4fe2 Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Mon, 12 Oct 2020 10:50:16 -0400 Subject: selftests: Add VRF route leaking tests The objective of the tests is to check that ICMP errors generated while crossing between VRFs are properly routed back to the source host. The first ttl test sends a ping with a ttl of 1 from h1 to h2 and parses the output of the command to check that a ttl expired error is received. The second ttl test runs traceroute from h1 to h2 and parses the output to check for a hop on r1. The mtu test sends a ping with a payload of 1450 from h1 to h2, through r1 which has an interface with a mtu of 1400 and parses the output of the command to check that a fragmentation needed error is received. [ The IPv6 MTU test still fails with the symmetric routing setup. It appears to be caused by source address selection picking ::1. Fixing this is beyond the scope of this series. ] Signed-off-by: Michael Jeanson Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/vrf_route_leaking.sh | 626 +++++++++++++++++++++++ 2 files changed, 627 insertions(+) create mode 100755 tools/testing/selftests/net/vrf_route_leaking.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 9491bbaa0831..3e7fb1e70c77 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -19,6 +19,7 @@ TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py +TEST_PROGS += vrf_route_leaking.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh new file mode 100755 index 000000000000..23cf924754a5 --- /dev/null +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -0,0 +1,626 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2019 David Ahern . All rights reserved. +# Copyright (c) 2020 Michael Jeanson . All rights reserved. +# +# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS. +# +# +# Symmetric routing topology +# +# blue red +# +----+ .253 +----+ .253 +----+ +# | h1 |-------------------| r1 |-------------------| h2 | +# +----+ .1 +----+ .2 +----+ +# 172.16.1/24 172.16.2/24 +# 2001:db8:16:1/64 2001:db8:16:2/64 +# +# +# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route +# to the outgoing vrf red for the n2 network and red has a route back to n1. +# The red VRF interface has a MTU of 1400. +# +# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the +# output of the command to check that a ttl expired error is received. +# +# The second test runs traceroute from h1 to h2 and parses the output to check +# for a hop on r1. +# +# The third test sends a ping with a packet size of 1450 from h1 to h2 and +# parses the output of the command to check that a fragmentation error is +# received. +# +# +# Asymmetric routing topology +# +# This topology represents a customer setup where the issue with icmp errors +# and VRF route leaking was initialy reported. The MTU test isn't done here +# because of the lack of a return route in the red VRF. +# +# blue red +# .253 +----+ .253 +# +----| r1 |----+ +# | +----+ | +# +----+ | | +----+ +# | h1 |--------------+ +--------------| h2 | +# +----+ .1 | | .2 +----+ +# 172.16.1/24 | +----+ | 172.16.2/24 +# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64 +# .254 +----+ .254 +# +# +# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the +# outgoing vrf red for the n2 network but red doesn't have a route back to n1. +# Route from h2 to h1 goes through r2. +# +# The objective is to check that the incoming vrf routing table is selected +# to send an ICMP error back to the source when the ttl of a packet reaches 1 +# while it is forwarded between different vrfs. + +VERBOSE=0 +PAUSE_ON_FAIL=no +DEFAULT_TTYPE=sym + +H1_N1=172.16.1.0/24 +H1_N1_6=2001:db8:16:1::/64 + +H1_N1_IP=172.16.1.1 +R1_N1_IP=172.16.1.253 +R2_N1_IP=172.16.1.254 + +H1_N1_IP6=2001:db8:16:1::1 +R1_N1_IP6=2001:db8:16:1::253 +R2_N1_IP6=2001:db8:16:1::254 + +H2_N2=172.16.2.0/24 +H2_N2_6=2001:db8:16:2::/64 + +H2_N2_IP=172.16.2.2 +R1_N2_IP=172.16.2.253 +R2_N2_IP=172.16.2.254 + +H2_N2_IP6=2001:db8:16:2::2 +R1_N2_IP6=2001:db8:16:2::253 +R2_N2_IP6=2001:db8:16:2::254 + +################################################################################ +# helpers + +log_section() +{ + echo + echo "###########################################################################" + echo "$*" + echo "###########################################################################" + echo +} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read -r a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +run_cmd() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + # shellcheck disable=SC2086 + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then + echo "$out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +run_cmd_grep() +{ + local grep_pattern="$1" + shift + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + # shellcheck disable=SC2086 + out=$(eval $cmd 2>&1) + if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then + echo "$out" + fi + + echo "$out" | grep -q "$grep_pattern" + rc=$? + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# setup and teardown + +cleanup() +{ + local ns + + for ns in h1 h2 r1 r2; do + ip netns del $ns 2>/dev/null + done +} + +setup_vrf() +{ + local ns=$1 + + ip -netns "${ns}" rule del pref 0 + ip -netns "${ns}" rule add pref 32765 from all lookup local + ip -netns "${ns}" -6 rule del pref 0 + ip -netns "${ns}" -6 rule add pref 32765 from all lookup local +} + +create_vrf() +{ + local ns=$1 + local vrf=$2 + local table=$3 + + ip -netns "${ns}" link add "${vrf}" type vrf table "${table}" + ip -netns "${ns}" link set "${vrf}" up + ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192 + ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192 + + ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}" + ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad +} + +setup_sym() +{ + local ns + + # make sure we are starting with a clean slate + cleanup + + # + # create nodes as namespaces + # + for ns in h1 h2 r1; do + ip netns add $ns + ip -netns $ns link set lo up + + case "${ns}" in + h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + ;; + r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + esac + done + + # + # create interconnects + # + ip -netns h1 link add eth0 type veth peer name r1h1 + ip -netns h1 link set r1h1 netns r1 name eth0 up + + ip -netns h2 link add eth0 type veth peer name r1h2 + ip -netns h2 link set r1h2 netns r1 name eth1 up + + # + # h1 + # + ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24 + ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad + ip -netns h1 link set eth0 up + + # h1 to h2 via r1 + ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 + ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 + + # + # h2 + # + ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns h2 link set eth0 up + + # h2 to h1 via r1 + ip -netns h2 route add default via ${R1_N2_IP} dev eth0 + ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0 + + # + # r1 + # + setup_vrf r1 + create_vrf r1 blue 1101 + create_vrf r1 red 1102 + ip -netns r1 link set mtu 1400 dev eth1 + ip -netns r1 link set eth0 vrf blue up + ip -netns r1 link set eth1 vrf red up + ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + + # Route leak from blue to red + ip -netns r1 route add vrf blue ${H2_N2} dev red + ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + + # Route leak from red to blue + ip -netns r1 route add vrf red ${H1_N1} dev blue + ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue + + + # Wait for ip config to settle + sleep 2 +} + +setup_asym() +{ + local ns + + # make sure we are starting with a clean slate + cleanup + + # + # create nodes as namespaces + # + for ns in h1 h2 r1 r2; do + ip netns add $ns + ip -netns $ns link set lo up + + case "${ns}" in + h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + ;; + r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + esac + done + + # + # create interconnects + # + ip -netns h1 link add eth0 type veth peer name r1h1 + ip -netns h1 link set r1h1 netns r1 name eth0 up + + ip -netns h1 link add eth1 type veth peer name r2h1 + ip -netns h1 link set r2h1 netns r2 name eth0 up + + ip -netns h2 link add eth0 type veth peer name r1h2 + ip -netns h2 link set r1h2 netns r1 name eth1 up + + ip -netns h2 link add eth1 type veth peer name r2h2 + ip -netns h2 link set r2h2 netns r2 name eth1 up + + # + # h1 + # + ip -netns h1 link add br0 type bridge + ip -netns h1 link set br0 up + ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns h1 link set eth0 master br0 up + ip -netns h1 link set eth1 master br0 up + + # h1 to h2 via r1 + ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 + ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 + + # + # h2 + # + ip -netns h2 link add br0 type bridge + ip -netns h2 link set br0 up + ip -netns h2 addr add dev br0 ${H2_N2_IP}/24 + ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad + ip -netns h2 link set eth0 master br0 up + ip -netns h2 link set eth1 master br0 up + + # h2 to h1 via r2 + ip -netns h2 route add default via ${R2_N2_IP} dev br0 + ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0 + + # + # r1 + # + setup_vrf r1 + create_vrf r1 blue 1101 + create_vrf r1 red 1102 + ip -netns r1 link set mtu 1400 dev eth1 + ip -netns r1 link set eth0 vrf blue up + ip -netns r1 link set eth1 vrf red up + ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + + # Route leak from blue to red + ip -netns r1 route add vrf blue ${H2_N2} dev red + ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + + # No route leak from red to blue + + # + # r2 + # + ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24 + ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad + + # Wait for ip config to settle + sleep 2 +} + +check_connectivity() +{ + ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 + log_test $? 0 "Basic IPv4 connectivity" + return $? +} + +check_connectivity6() +{ + ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 + log_test $? 0 "Basic IPv6 connectivity" + return $? +} + +check_traceroute() +{ + if [ ! -x "$(command -v traceroute)" ]; then + echo "SKIP: Could not run IPV4 test without traceroute" + return 1 + fi +} + +check_traceroute6() +{ + if [ ! -x "$(command -v traceroute6)" ]; then + echo "SKIP: Could not run IPV6 test without traceroute6" + return 1 + fi +} + +ipv4_traceroute() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute" + + check_traceroute || return + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP} + log_test $? 0 "Traceroute reports a hop on r1" +} + +ipv4_traceroute_asym() +{ + ipv4_traceroute asym +} + +ipv6_traceroute() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute" + + check_traceroute6 || return + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6} + log_test $? 0 "Traceroute6 reports a hop on r1" +} + +ipv6_traceroute_asym() +{ + ipv6_traceroute asym +} + +ipv4_ping_ttl() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping" + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP} + log_test $? 0 "Ping received ICMP ttl exceeded" +} + +ipv4_ping_ttl_asym() +{ + ipv4_ping_ttl asym +} + +ipv4_ping_frag() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping" + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} + log_test $? 0 "Ping received ICMP Frag needed" +} + +ipv4_ping_frag_asym() +{ + ipv4_ping_frag asym +} + +ipv6_ping_ttl() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping" + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} + log_test $? 0 "Ping received ICMP Hop limit" +} + +ipv6_ping_ttl_asym() +{ + ipv6_ping_ttl asym +} + +ipv6_ping_frag() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping" + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} + log_test $? 0 "Ping received ICMP Packet too big" +} + +ipv6_ping_frag_asym() +{ + ipv6_ping_frag asym +} + +################################################################################ +# usage + +usage() +{ + cat < /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) + +TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym" + +ret=0 +nsuccess=0 +nfail=0 + +while getopts :46t:pvh o +do + case $o in + 4) TESTS=ipv4;; + 6) TESTS=ipv6;; + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# +# show user test config +# +if [ -z "$TESTS" ]; then + TESTS="$TESTS_IPV4 $TESTS_IPV6" +elif [ "$TESTS" = "ipv4" ]; then + TESTS="$TESTS_IPV4" +elif [ "$TESTS" = "ipv6" ]; then + TESTS="$TESTS_IPV6" +fi + +for t in $TESTS +do + case $t in + ipv4_ping_ttl|ping) ipv4_ping_ttl;;& + ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;& + ipv4_traceroute|traceroute) ipv4_traceroute;;& + ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;& + ipv4_ping_frag|ping) ipv4_ping_frag;;& + + ipv6_ping_ttl|ping) ipv6_ping_ttl;;& + ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;& + ipv6_traceroute|traceroute) ipv6_traceroute;;& + ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;& + ipv6_ping_frag|ping) ipv6_ping_frag;;& + + # setup namespaces and config, but do not run any tests + setup_sym|setup) setup_sym; exit 0;; + setup_asym) setup_asym; exit 0;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +cleanup + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret -- cgit v1.3.1 From 4a770b413fe91d5d271e29a50a1f23cf189b4d2e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 12 Oct 2020 08:50:51 +0200 Subject: parisc: Add MAP_UNINITIALIZED define We will not allow unitialized anon mmaps, but we need this define to prevent build errors, e.g. the debian foot package. Suggested-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/mman.h | 1 + tools/arch/parisc/include/uapi/asm/mman.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 6fd8871e4081..ab78cba446ed 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -25,6 +25,7 @@ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ +#define MAP_UNINITIALIZED 0 /* uninitialized anonymous mmap */ #define MS_SYNC 1 /* synchronous memory sync */ #define MS_ASYNC 2 /* sync memory asynchronously */ diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h index f9fd1325f5bd..506c06a6536f 100644 --- a/tools/arch/parisc/include/uapi/asm/mman.h +++ b/tools/arch/parisc/include/uapi/asm/mman.h @@ -39,6 +39,5 @@ #define MADV_SOFT_OFFLINE 101 /* MAP_32BIT is undefined on parisc, fix it for perf */ #define MAP_32BIT 0 -/* MAP_UNINITIALIZED is undefined on parisc, fix it for perf */ #define MAP_UNINITIALIZED 0 #endif -- cgit v1.3.1 From b596e979c8c929959af8ef252a16971492fc8265 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 14 Oct 2020 06:09:14 +0100 Subject: perf c2c: Display the total numbers continuously To view the statistics with "breakdown" mode, it's good to show the summary numbers for the total records, all stores and all loads, then the sequential conlumns can be used to break into more detailed items. To achieve this purpose, this patch displays the summary numbers for records/stores/loads continuously and places them before breakdown items, this can allow uses to easily read the summarized statistics. Signed-off-by: Leo Yan Tested-by: Joe Mario Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201014050921.5591-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 57bb6cce43e3..b5d99ddc7473 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2846,13 +2846,13 @@ static int perf_c2c__report(int argc, const char **argv) "dcacheline," "dcacheline_node," "dcacheline_count," - "tot_recs," "percent_hitm," "tot_hitm,lcl_hitm,rmt_hitm," + "tot_recs," + "tot_loads," "stores,stores_l1hit,stores_l1miss," "dram_lcl,dram_rmt," "ld_llcmiss," - "tot_loads," "ld_fbhit,ld_l1hit,ld_l2hit," "ld_lclhit,ld_rmthit", c2c.display == DISPLAY_TOT ? "tot_hitm" : -- cgit v1.3.1 From 4f28641bde8a005462a59d654add4e00767a962c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 14 Oct 2020 06:09:15 +0100 Subject: perf c2c: Display "Total Stores" as a standalone metrics The total stores is displayed under the metrics "Store Reference", to output the same format with total records and all loads, extract the total stores number as a standalone metrics "Total Stores". After this patch, the tool shows the summary numbers ("Total records", "Total loads", "Total Stores") in the unified form. Before: # ----------- Cacheline ---------- Tot ----- LLC Load Hitm ----- Total Total ---- Store Reference ---- --- Load Dram ---- LLC ----- Core Load Hit ----- -- LLC Load Hit -- # Index Address Node PA cnt Hitm Total Lcl Rmt records Loads Total L1Hit L1Miss Lcl Rmt Ld Miss FB L1 L2 Llc Rmt # ..... .................. .... ...... ....... ....... ....... ....... ....... ....... ....... ....... ....... ........ ........ ....... ....... ....... ....... ........ ........ # 0 0x55f07d580100 0 1499 85.89% 481 481 0 7243 3879 3364 2599 765 0 0 0 548 2615 66 169 0 1 0x55f07d580080 0 1 13.93% 78 78 0 664 664 0 0 0 0 0 0 187 361 27 11 0 2 0x55f07d5800c0 0 1 0.18% 1 1 0 405 405 0 0 0 0 0 0 131 0 10 263 0 After: # ----------- Cacheline ---------- Tot ----- LLC Load Hitm ----- Total Total Total ---- Stores ---- --- Load Dram ---- LLC ----- Core Load Hit ----- -- LLC Load Hit -- # Index Address Node PA cnt Hitm Total Lcl Rmt records Loads Stores L1Hit L1Miss Lcl Rmt Ld Miss FB L1 L2 Llc Rmt # ..... .................. .... ...... ....... ....... ....... ....... ....... ....... ....... ....... ....... ........ ........ ....... ....... ....... ....... ........ ........ # 0 0x55f07d580100 0 1499 85.89% 481 481 0 7243 3879 3364 2599 765 0 0 0 548 2615 66 169 0 1 0x55f07d580080 0 1 13.93% 78 78 0 664 664 0 0 0 0 0 0 187 361 27 11 0 2 0x55f07d5800c0 0 1 0.18% 1 1 0 405 405 0 0 0 0 0 0 131 0 10 263 0 Signed-off-by: Leo Yan Tested-by: Joe Mario Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201014050921.5591-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b5d99ddc7473..51250f37ac9c 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1367,16 +1367,16 @@ static struct c2c_dimension dim_cl_lcl_hitm = { .width = 7, }; -static struct c2c_dimension dim_stores = { - .header = HEADER_SPAN("---- Store Reference ----", "Total", 2), - .name = "stores", +static struct c2c_dimension dim_tot_stores = { + .header = HEADER_BOTH("Total", "Stores"), + .name = "tot_stores", .cmp = store_cmp, .entry = store_entry, .width = 7, }; static struct c2c_dimension dim_stores_l1hit = { - .header = HEADER_SPAN_LOW("L1Hit"), + .header = HEADER_SPAN("---- Stores ----", "L1Hit", 1), .name = "stores_l1hit", .cmp = st_l1hit_cmp, .entry = st_l1hit_entry, @@ -1648,7 +1648,7 @@ static struct c2c_dimension *dimensions[] = { &dim_rmt_hitm, &dim_cl_lcl_hitm, &dim_cl_rmt_hitm, - &dim_stores, + &dim_tot_stores, &dim_stores_l1hit, &dim_stores_l1miss, &dim_cl_stores_l1hit, @@ -2850,7 +2850,8 @@ static int perf_c2c__report(int argc, const char **argv) "tot_hitm,lcl_hitm,rmt_hitm," "tot_recs," "tot_loads," - "stores,stores_l1hit,stores_l1miss," + "tot_stores," + "stores_l1hit,stores_l1miss," "dram_lcl,dram_rmt," "ld_llcmiss," "ld_fbhit,ld_l1hit,ld_l2hit," -- cgit v1.3.1 From 6d662d730d6cdc16a19c4bc076c03e2cd21c242d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 14 Oct 2020 06:09:16 +0100 Subject: perf c2c: Organize metrics based on memory hierarchy The metrics are not organized based on memory hierarchy, e.g. the tool doesn't organize the metrics order based on memory nodes from the close node (e.g. L1/L2 cache) to far node (e.g. L3 cache and DRAM). To output metrics with more friendly form, this patch refines the metrics order based on memory hierarchy: "Core Load Hit" => "LLC Load Hit" => "LLC Ld Miss" => "Load Dram" Signed-off-by: Leo Yan Tested-by: Joe Mario Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201014050921.5591-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 51250f37ac9c..8947a8c48d5a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2852,10 +2852,10 @@ static int perf_c2c__report(int argc, const char **argv) "tot_loads," "tot_stores," "stores_l1hit,stores_l1miss," - "dram_lcl,dram_rmt," - "ld_llcmiss," "ld_fbhit,ld_l1hit,ld_l2hit," - "ld_lclhit,ld_rmthit", + "ld_lclhit,ld_rmthit," + "ld_llcmiss," + "dram_lcl,dram_rmt", c2c.display == DISPLAY_TOT ? "tot_hitm" : c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" ); -- cgit v1.3.1 From fdd32d7e8e81de8ce93e552b57838ce5413bf5e9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 14 Oct 2020 06:09:17 +0100 Subject: perf c2c: Change header from "LLC Load Hitm" to "Load Hitm" The metrics "LLC Load Hitm" contains two items: one is "local Hitm" and another is "remote Hitm". "local Hitm" means: L3 HIT and was serviced by another processor core with a cross core snoop where modified copies were found; it's no doubt that "local Hitm" belongs to LLC access. But for "remote Hitm", based on the code in util/mem-events, it's the event for remote cache HIT and was serviced by another processor core with modified copies. Thus the remote Hitm is a remote cache's hit and actually it's LLC load miss. Now the display format gives users the impression that "local Hitm" and "remote Hitm" both belong to the LLC load, but this is not the fact as described. This patch changes the header from "LLC Load Hitm" to "Load Hitm", this can avoid the give the wrong impression that all Hitm belong to LLC. Signed-off-by: Leo Yan Tested-by: Joe Mario Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201014050921.5591-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 8947a8c48d5a..93f2d6c6c2b3 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1328,7 +1328,7 @@ static struct c2c_dimension dim_iaddr = { }; static struct c2c_dimension dim_tot_hitm = { - .header = HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2), + .header = HEADER_SPAN("------- Load Hitm -------", "Total", 2), .name = "tot_hitm", .cmp = tot_hitm_cmp, .entry = tot_hitm_entry, -- cgit v1.3.1 From 0fbe2fe965cb684e00ab4e94d266b2cb46a63dc5 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 14 Oct 2020 06:09:18 +0100 Subject: perf c2c: Use more explicit headers for HITM Local and remote HITM use the headers 'Lcl' and 'Rmt' respectively, suppose if we want to extend the tool to display these two dimensions under any one metrics, users cannot understand the semantics if only based on the header string 'Lcl' or 'Rmt'. To explicit express the meaning for HITM items, this patch changes the headers string as "LclHitm" and "RmtHitm", the strings are more readable and this allows to extend metrics for using HITM items. Signed-off-by: Leo Yan Tested-by: Joe Mario Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201014050921.5591-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 93f2d6c6c2b3..05eeca2f0a6a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1336,7 +1336,7 @@ static struct c2c_dimension dim_tot_hitm = { }; static struct c2c_dimension dim_lcl_hitm = { - .header = HEADER_SPAN_LOW("Lcl"), + .header = HEADER_SPAN_LOW("LclHitm"), .name = "lcl_hitm", .cmp = lcl_hitm_cmp, .entry = lcl_hitm_entry, @@ -1344,7 +1344,7 @@ static struct c2c_dimension dim_lcl_hitm = { }; static struct c2c_dimension dim_rmt_hitm = { - .header = HEADER_SPAN_LOW("Rmt"), + .header = HEADER_SPAN_LOW("RmtHitm"), .name = "rmt_hitm", .cmp = rmt_hitm_cmp, .entry = rmt_hitm_entry, @@ -1486,7 +1486,7 @@ static struct c2c_dimension dim_percent_hitm = { }; static struct c2c_dimension dim_percent_rmt_hitm = { - .header = HEADER_SPAN("----- HITM -----", "Rmt", 1), + .header = HEADER_SPAN("----- HITM -----", "RmtHitm", 1), .name = "percent_rmt_hitm", .cmp = percent_rmt_hitm_cmp, .entry = percent_rmt_hitm_entry, @@ -1495,7 +1495,7 @@ static struct c2c_dimension dim_percent_rmt_hitm = { }; static struct c2c_dimension dim_percent_lcl_hitm = { - .header = HEADER_SPAN_LOW("Lcl"), + .header = HEADER_SPAN_LOW("LclHitm"), .name = "percent_lcl_hitm", .cmp = percent_lcl_hitm_cmp, .entry = percent_lcl_hitm_entry, -- cgit v1.3.1 From ed626a3e52cacd324fa1b7c4cc4c252f8f39f4d4 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 14 Oct 2020 06:09:19 +0100 Subject: perf c2c: Change header for LLC local hit Replace the header string "Lcl" with "LclHit", which is more explicit to express the event type is LLC local hit. Signed-off-by: Leo Yan Tested-by: Joe Mario Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201014050921.5591-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 05eeca2f0a6a..fd7bfe42a629 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1432,7 +1432,7 @@ static struct c2c_dimension dim_ld_l2hit = { }; static struct c2c_dimension dim_ld_llchit = { - .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1), + .header = HEADER_SPAN("-- LLC Load Hit --", "LclHit", 1), .name = "ld_lclhit", .cmp = ld_llchit_cmp, .entry = ld_llchit_entry, -- cgit v1.3.1 From 77c158698c395cf127fdc6e0b9699917f72ef47f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 14 Oct 2020 06:09:20 +0100 Subject: perf c2c: Correct LLC load hit metrics "rmt_hit" is accounted into two metrics: one is accounted into the metrics "LLC Ld Miss" (see the function llc_miss() for calculation "llcmiss"); and it's accounted into metrics "LLC Load Hit". Thus, for the literal meaning, it is contradictory that "rmt_hit" is accounted for both "LLC Ld Miss" (LLC miss) and "LLC Load Hit" (LLC hit). Thus this is easily to introduce confusion: "LLC Load Hit" gives impression that all items belong to it are LLC hit; in fact "rmt_hit" is LLC miss and remote cache hit. To give out clear semantics for metric "LLC Load Hit", "rmt_hit" is moved out from it and changes "LLC Load Hit" to contain two items: LLC Load Hit = LLC's hit ("ld_llchit") + LLC's hitm ("lcl_hitm") For output alignment, adjusts the header for "LLC Load Hit". Signed-off-by: Leo Yan Tested-by: Joe Mario Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201014050921.5591-8-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index fd7bfe42a629..bfbcd635cd73 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1432,7 +1432,7 @@ static struct c2c_dimension dim_ld_l2hit = { }; static struct c2c_dimension dim_ld_llchit = { - .header = HEADER_SPAN("-- LLC Load Hit --", "LclHit", 1), + .header = HEADER_SPAN("- LLC Load Hit --", "LclHit", 1), .name = "ld_lclhit", .cmp = ld_llchit_cmp, .entry = ld_llchit_entry, @@ -2853,7 +2853,7 @@ static int perf_c2c__report(int argc, const char **argv) "tot_stores," "stores_l1hit,stores_l1miss," "ld_fbhit,ld_l1hit,ld_l2hit," - "ld_lclhit,ld_rmthit," + "ld_lclhit,lcl_hitm," "ld_llcmiss," "dram_lcl,dram_rmt", c2c.display == DISPLAY_TOT ? "tot_hitm" : -- cgit v1.3.1 From 91d933c22164e22315f860e7177fbeec5e8510b0 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 14 Oct 2020 06:09:21 +0100 Subject: perf c2c: Add metrics "RMT Load Hit" The metrics "LLC Ld Miss" and "Load Dram" overlap with each other for accouting items: "LLC Ld Miss" = "lcl_dram" + "rmt_dram" + "rmt_hit" + "rmt_hitm" "Load Dram" = "lcl_dram" + "rmt_dram" Furthermore, the metrics "LLC Ld Miss" is not directive to show statistics due to it contains summary value and cannot give out breakdown details. For this reason, add a new metrics "RMT Load Hit" which is used to present the remote cache hit; it contains two items: "RMT Load Hit" = remote hit ("rmt_hit") + remote hitm ("rmt_hitm") As result, the metrics "LLC Ld Miss" is perfectly divided into two metrics "RMT Load Hit" and "Load Dram". It's not necessary to keep metrics "LLC Ld Miss", so remove it. Before: # ----------- Cacheline ---------- Tot ------- Load Hitm ------- Total Total Total ---- Stores ---- ----- Core Load Hit ----- - LLC Load Hit -- LLC --- Load Dram ---- # Index Address Node PA cnt Hitm Total LclHitm RmtHitm records Loads Stores L1Hit L1Miss FB L1 L2 LclHit LclHitm Ld Miss Lcl Rmt # ..... .................. .... ...... ....... ....... ....... ....... ....... ....... ....... ....... ....... ....... ....... ....... ........ ....... ....... ........ ........ # 0 0x55f07d580100 0 1499 85.89% 481 481 0 7243 3879 3364 2599 765 548 2615 66 169 481 0 0 0 1 0x55f07d580080 0 1 13.93% 78 78 0 664 664 0 0 0 187 361 27 11 78 0 0 0 2 0x55f07d5800c0 0 1 0.18% 1 1 0 405 405 0 0 0 131 0 10 263 1 0 0 0 After: # ----------- Cacheline ---------- Tot ------- Load Hitm ------- Total Total Total ---- Stores ---- ----- Core Load Hit ----- - LLC Load Hit -- - RMT Load Hit -- --- Load Dram ---- # Index Address Node PA cnt Hitm Total LclHitm RmtHitm records Loads Stores L1Hit L1Miss FB L1 L2 LclHit LclHitm RmtHit RmtHitm Lcl Rmt # ..... .................. .... ...... ....... ....... ....... ....... ....... ....... ....... ....... ....... ....... ....... ....... ........ ....... ........ ....... ........ ........ # 0 0x55f07d580100 0 1499 85.89% 481 481 0 7243 3879 3364 2599 765 548 2615 66 169 481 0 0 0 0 1 0x55f07d580080 0 1 13.93% 78 78 0 664 664 0 0 0 187 361 27 11 78 0 0 0 0 2 0x55f07d5800c0 0 1 0.18% 1 1 0 405 405 0 0 0 131 0 10 263 1 0 0 0 0 Signed-off-by: Leo Yan Tested-by: Joe Mario Acked-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201014050921.5591-9-leo.yan@linaro.org --- tools/perf/builtin-c2c.c | 52 ++---------------------------------------------- 1 file changed, 2 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index bfbcd635cd73..d5bea5d3cd51 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -652,45 +652,6 @@ STAT_FN(ld_l2hit) STAT_FN(ld_llchit) STAT_FN(rmt_hit) -static uint64_t llc_miss(struct c2c_stats *stats) -{ - uint64_t llcmiss; - - llcmiss = stats->lcl_dram + - stats->rmt_dram + - stats->rmt_hitm + - stats->rmt_hit; - - return llcmiss; -} - -static int -ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct hist_entry *he) -{ - struct c2c_hist_entry *c2c_he; - int width = c2c_width(fmt, hpp, he->hists); - - c2c_he = container_of(he, struct c2c_hist_entry, he); - - return scnprintf(hpp->buf, hpp->size, "%*lu", width, - llc_miss(&c2c_he->stats)); -} - -static int64_t -ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, - struct hist_entry *left, struct hist_entry *right) -{ - struct c2c_hist_entry *c2c_left; - struct c2c_hist_entry *c2c_right; - - c2c_left = container_of(left, struct c2c_hist_entry, he); - c2c_right = container_of(right, struct c2c_hist_entry, he); - - return (uint64_t) llc_miss(&c2c_left->stats) - - (uint64_t) llc_miss(&c2c_right->stats); -} - static uint64_t total_records(struct c2c_stats *stats) { uint64_t lclmiss, ldcnt, total; @@ -1440,21 +1401,13 @@ static struct c2c_dimension dim_ld_llchit = { }; static struct c2c_dimension dim_ld_rmthit = { - .header = HEADER_SPAN_LOW("Rmt"), + .header = HEADER_SPAN("- RMT Load Hit --", "RmtHit", 1), .name = "ld_rmthit", .cmp = rmt_hit_cmp, .entry = rmt_hit_entry, .width = 8, }; -static struct c2c_dimension dim_ld_llcmiss = { - .header = HEADER_BOTH("LLC", "Ld Miss"), - .name = "ld_llcmiss", - .cmp = ld_llcmiss_cmp, - .entry = ld_llcmiss_entry, - .width = 7, -}; - static struct c2c_dimension dim_tot_recs = { .header = HEADER_BOTH("Total", "records"), .name = "tot_recs", @@ -1658,7 +1611,6 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_l2hit, &dim_ld_llchit, &dim_ld_rmthit, - &dim_ld_llcmiss, &dim_tot_recs, &dim_tot_loads, &dim_percent_hitm, @@ -2854,7 +2806,7 @@ static int perf_c2c__report(int argc, const char **argv) "stores_l1hit,stores_l1miss," "ld_fbhit,ld_l1hit,ld_l2hit," "ld_lclhit,lcl_hitm," - "ld_llcmiss," + "ld_rmthit,rmt_hitm," "dram_lcl,dram_rmt", c2c.display == DISPLAY_TOT ? "tot_hitm" : c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" -- cgit v1.3.1 From e688c3db7ca69bea1872c5706aec6a7fdf89df17 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 14 Oct 2020 10:56:08 -0700 Subject: bpf: Fix register equivalence tracking. The 64-bit JEQ/JNE handling in reg_set_min_max() was clearing reg->id in either true or false branch. In the case 'if (reg->id)' check was done on the other branch the counter part register would have reg->id == 0 when called into find_equal_scalars(). In such case the helper would incorrectly identify other registers with id == 0 as equivalent and propagate the state incorrectly. Fix it by preserving ID across reg_set_min_max(). In other words any kind of comparison operator on the scalar register should preserve its ID to recognize: r1 = r2 if (r1 == 20) { #1 here both r1 and r2 == 20 } else if (r2 < 20) { #2 here both r1 and r2 < 20 } The patch is addressing #1 case. The #2 was working correctly already. Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: John Fastabend Tested-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201014175608.1416-1-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 38 ++++++++++++++++--------- tools/testing/selftests/bpf/verifier/regalloc.c | 26 +++++++++++++++++ 2 files changed, 51 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c43a5e8f0818..39d7f44e7c92 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1010,14 +1010,9 @@ static const int caller_saved[CALLER_SAVED_REGS] = { static void __mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg); -/* Mark the unknown part of a register (variable offset or scalar value) as - * known to have the value @imm. - */ -static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) +/* This helper doesn't clear reg->id */ +static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) { - /* Clear id, off, and union(map_ptr, range) */ - memset(((u8 *)reg) + sizeof(reg->type), 0, - offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); reg->var_off = tnum_const(imm); reg->smin_value = (s64)imm; reg->smax_value = (s64)imm; @@ -1030,6 +1025,17 @@ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) reg->u32_max_value = (u32)imm; } +/* Mark the unknown part of a register (variable offset or scalar value) as + * known to have the value @imm. + */ +static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) +{ + /* Clear id, off, and union(map_ptr, range) */ + memset(((u8 *)reg) + sizeof(reg->type), 0, + offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); + ___mark_reg_known(reg, imm); +} + static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm) { reg->var_off = tnum_const_subreg(reg->var_off, imm); @@ -7001,14 +7007,18 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *reg = opcode == BPF_JEQ ? true_reg : false_reg; - /* For BPF_JEQ, if this is false we know nothing Jon Snow, but - * if it is true we know the value for sure. Likewise for - * BPF_JNE. + /* JEQ/JNE comparison doesn't change the register equivalence. + * r1 = r2; + * if (r1 == 42) goto label; + * ... + * label: // here both r1 and r2 are known to be 42. + * + * Hence when marking register as known preserve it's ID. */ if (is_jmp32) __mark_reg32_known(reg, val32); else - __mark_reg_known(reg, val); + ___mark_reg_known(reg, val); break; } case BPF_JSET: @@ -7551,7 +7561,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, reg_combine_min_max(&other_branch_regs[insn->src_reg], &other_branch_regs[insn->dst_reg], src_reg, dst_reg, opcode); - if (src_reg->id) { + if (src_reg->id && + !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { find_equal_scalars(this_branch, src_reg); find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); } @@ -7563,7 +7574,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, opcode, is_jmp32); } - if (dst_reg->type == SCALAR_VALUE && dst_reg->id) { + if (dst_reg->type == SCALAR_VALUE && dst_reg->id && + !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) { find_equal_scalars(this_branch, dst_reg); find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]); } diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c index ac71b824f97a..4ad7e05de706 100644 --- a/tools/testing/selftests/bpf/verifier/regalloc.c +++ b/tools/testing/selftests/bpf/verifier/regalloc.c @@ -241,3 +241,29 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "regalloc, spill, JEQ", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0), + /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0), + /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */ + /* Buggy verifier will think that r3 == 20 here */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */ + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, -- cgit v1.3.1 From 744aec4df2c5b4d12af26a57d8858af2f59ef3d0 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 15 Oct 2020 15:45:48 +0100 Subject: perf c2c: Update documentation for metrics reorganization The output format for metrics has been reorganized, update documentation to reflect the changes for it. Signed-off-by: Leo Yan Cc: Al Grant Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Joe Mario Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201015144548.18482-10-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 98efdab5fbd4..c81d72e3eecf 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -174,34 +174,36 @@ For each cacheline in the 1) list we display following data: Cacheline - cacheline address (hex number) - Total records - - sum of all cachelines accesses - Rmt/Lcl Hitm - cacheline percentage of all Remote/Local HITM accesses - LLC Load Hitm - Total, Lcl, Rmt + LLC Load Hitm - Total, LclHitm, RmtHitm - count of Total/Local/Remote load HITMs - Store Reference - Total, L1Hit, L1Miss - Total - all store accesses - L1Hit - store accesses that hit L1 - L1Hit - store accesses that missed L1 + Total records + - sum of all cachelines accesses - Load Dram - - count of local and remote DRAM accesses + Total loads + - sum of all load accesses - LLC Ld Miss - - count of all accesses that missed LLC + Total stores + - sum of all store accesses - Total Loads - - sum of all load accesses + Store Reference - L1Hit, L1Miss + L1Hit - store accesses that hit L1 + L1Miss - store accesses that missed L1 Core Load Hit - FB, L1, L2 - count of load hits in FB (Fill Buffer), L1 and L2 cache - LLC Load Hit - Llc, Rmt - - count of LLC and Remote load hits + LLC Load Hit - LlcHit, LclHitm + - count of LLC load accesses, includes LLC hits and LLC HITMs + + RMT Load Hit - RmtHit, RmtHitm + - count of remote load accesses, includes remote hits and remote HITMs + + Load Dram - Lcl, Rmt + - count of local and remote DRAM accesses For each offset in the 2) list we display following data: -- cgit v1.3.1 From 96378b2088faea68f1fb05ea6b9a566fc569a44c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 13 Oct 2020 09:17:56 -0500 Subject: selftests/ftrace: Change synthetic event name for inter-event-combined test This test uses waking+wakeup_latency as an event name, which doesn't make sense since it includes an operator. Illegal names are now detected by the synthetic event command parsing, which causes this test to fail. Change the name to 'waking_plus_wakeup_latency' to prevent this. Link: https://lkml.kernel.org/r/a1ee2f76ff28ef7166fb788ca8be968887808920.1602598160.git.zanussi@kernel.org Fixes: f06eec4d0f2c (selftests: ftrace: Add inter-event hist triggers testcases) Acked-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- .../trigger/inter-event/trigger-inter-event-combined-hist.tc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc index 7449a4b8f1f9..9098f1e7433f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -25,12 +25,12 @@ echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger -echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events -echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger -echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger +echo 'waking_plus_wakeup_latency u64 lat; pid_t pid' >> synthetic_events +echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking_plus_wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking_plus_wakeup_latency/trigger ping $LOCALHOST -c 3 -if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then +if ! grep -q "pid:" events/synthetic/waking_plus_wakeup_latency/hist; then fail "Failed to create combined histogram" fi -- cgit v1.3.1 From 81ff92a93d954efa29e1d514934d6a47cd5f4558 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 13 Oct 2020 09:17:58 -0500 Subject: selftests/ftrace: Add test case for synthetic event syntax errors Add a selftest that verifies that the syntax error messages and caret positions are correct for most of the possible synthetic event syntax error cases. Link: https://lkml.kernel.org/r/af611928ce79f86eaf0af8654f1d7802d5cc21ff.1602598160.git.zanussi@kernel.org Tested-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- .../trigger-synthetic_event_syntax_errors.tc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc new file mode 100644 index 000000000000..ada594fe16cb --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc @@ -0,0 +1,19 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test synthetic_events syntax parser errors +# requires: synthetic_events error_log + +check_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events' +} + +check_error 'myevent ^chr arg' # INVALID_TYPE +check_error 'myevent ^char str[];; int v' # INVALID_TYPE +check_error 'myevent char ^str]; int v' # INVALID_NAME +check_error 'myevent char ^str;[]' # INVALID_NAME +check_error 'myevent ^char str[; int v' # INVALID_TYPE +check_error '^mye;vent char str[]' # BAD_NAME +check_error 'myevent char str[]; ^int' # INVALID_FIELD +check_error '^myevent' # INCOMPLETE_CMD + +exit 0 -- cgit v1.3.1 From 8a3feed90e75283f15f830b1051bf048e4127c91 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 12 Oct 2020 16:49:40 -0700 Subject: bpf, selftest: Fix flaky tcp_hdr_options test when adding addr to lo The tcp_hdr_options test adds a "::eB9F" addr to the lo dev. However, this non loopback address will have a race on ipv6 dad which may lead to EADDRNOTAVAIL error from time to time. Even nodad is used in the iproute2 command, there is still a race in when the route will be added. This will then lead to ENETUNREACH from time to time. To avoid the above, this patch uses the default loopback address "::1" to do the test. Fixes: ad2f8eb0095e ("bpf: selftests: Tcp header options") Reported-by: Andrii Nakryiko Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201012234940.1707941-1-kafai@fb.com --- .../selftests/bpf/prog_tests/tcp_hdr_options.c | 26 +--------------------- .../bpf/progs/test_misc_tcp_hdr_options.c | 2 +- 2 files changed, 2 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index c86e67214a9e..c85174cdcb77 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -15,7 +15,7 @@ #include "test_tcp_hdr_options.skel.h" #include "test_misc_tcp_hdr_options.skel.h" -#define LO_ADDR6 "::eB9F" +#define LO_ADDR6 "::1" #define CG_NAME "/tcpbpf-hdr-opt-test" struct bpf_test_option exp_passive_estab_in; @@ -40,27 +40,6 @@ struct sk_fds { int active_lport; }; -static int add_lo_addr(void) -{ - char ip_addr_cmd[256]; - int cmdlen; - - cmdlen = snprintf(ip_addr_cmd, sizeof(ip_addr_cmd), - "ip -6 addr add %s/128 dev lo scope host", - LO_ADDR6); - - if (CHECK(cmdlen >= sizeof(ip_addr_cmd), "compile ip cmd", - "failed to add host addr %s to lo. ip cmdlen is too long\n", - LO_ADDR6)) - return -1; - - if (CHECK(system(ip_addr_cmd), "run ip cmd", - "failed to add host addr %s to lo\n", LO_ADDR6)) - return -1; - - return 0; -} - static int create_netns(void) { if (CHECK(unshare(CLONE_NEWNET), "create netns", @@ -72,9 +51,6 @@ static int create_netns(void) "failed to bring lo link up\n")) return -1; - if (add_lo_addr()) - return -1; - return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index 72ec0178f653..6077a025092c 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -16,7 +16,7 @@ #define BPF_PROG_TEST_TCP_HDR_OPTIONS #include "test_tcp_hdr_options.h" -__u16 last_addr16_n = __bpf_htons(0xeB9F); +__u16 last_addr16_n = __bpf_htons(1); __u16 active_lport_n = 0; __u16 active_lport_h = 0; __u16 passive_lport_n = 0; -- cgit v1.3.1 From 206e22f01941b19f9466f48b53cc0d19de493e7a Mon Sep 17 00:00:00 2001 From: Chris Kennelly Date: Thu, 15 Oct 2020 20:12:36 -0700 Subject: tools/testing/selftests: add self-test for verifying load alignment This produces a PIE binary with a variety of p_align requirements, suitable for verifying that the load address meets that alignment requirement. Signed-off-by: Chris Kennelly Signed-off-by: Andrew Morton Cc: Shuah Khan Cc: Alexander Viro Cc: Alexey Dobriyan Cc: David Rientjes Cc: Fangrui Song Cc: Hugh Dickens Cc: Ian Rogers Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Nick Desaulniers Cc: Sandeep Patil Cc: Song Liu Cc: Suren Baghdasaryan Link: https://lkml.kernel.org/r/20200820170541.1132271-3-ckennelly@google.com Link: https://lkml.kernel.org/r/20200821233848.3904680-3-ckennelly@google.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/exec/.gitignore | 1 + tools/testing/selftests/exec/Makefile | 9 +++- tools/testing/selftests/exec/load_address.c | 68 +++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/exec/load_address.c (limited to 'tools') diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 344a99c6da1b..9e2f00343f15 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -7,6 +7,7 @@ execveat.moved execveat.path.ephemeral execveat.ephemeral execveat.denatured +/load_address_* /recursion-depth xxxxxxxx* pipe diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 0a13b110c1e6..cf69b2fcce59 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE TEST_PROGS := binfmt_script non-regular -TEST_GEN_PROGS := execveat +TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile @@ -27,4 +27,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ - +$(OUTPUT)/load_address_4096: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@ +$(OUTPUT)/load_address_2097152: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@ +$(OUTPUT)/load_address_16777216: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@ diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c new file mode 100644 index 000000000000..d487c2f6a615 --- /dev/null +++ b/tools/testing/selftests/exec/load_address.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include + +struct Statistics { + unsigned long long load_address; + unsigned long long alignment; +}; + +int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) +{ + struct Statistics *stats = (struct Statistics *) data; + int i; + + if (info->dlpi_name != NULL && info->dlpi_name[0] != '\0') { + // Ignore headers from other than the executable. + return 2; + } + + stats->load_address = (unsigned long long) info->dlpi_addr; + stats->alignment = 0; + + for (i = 0; i < info->dlpi_phnum; i++) { + if (info->dlpi_phdr[i].p_type != PT_LOAD) + continue; + + if (info->dlpi_phdr[i].p_align > stats->alignment) + stats->alignment = info->dlpi_phdr[i].p_align; + } + + return 1; // Terminate dl_iterate_phdr. +} + +int main(int argc, char **argv) +{ + struct Statistics extracted; + unsigned long long misalign; + int ret; + + ret = dl_iterate_phdr(ExtractStatistics, &extracted); + if (ret != 1) { + fprintf(stderr, "FAILED\n"); + return 1; + } + + if (extracted.alignment == 0) { + fprintf(stderr, "No alignment found\n"); + return 1; + } else if (extracted.alignment & (extracted.alignment - 1)) { + fprintf(stderr, "Alignment is not a power of 2\n"); + return 1; + } + + misalign = extracted.load_address & (extracted.alignment - 1); + if (misalign) { + printf("alignment = %llu, load_address = %llu\n", + extracted.alignment, extracted.load_address); + fprintf(stderr, "FAILED\n"); + return 1; + } + + fprintf(stderr, "PASS\n"); + return 0; +} -- cgit v1.3.1 From 255965309104fc62e3161997b93aea31c2c59941 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Sat, 17 Oct 2020 16:14:47 -0700 Subject: selftests/vm: 10x speedup for hmm-tests This patch reduces the running time for hmm-tests from about 10+ seconds, to just under 1.0 second, for an approximately 10x speedup. That brings it in line with most of the other tests in selftests/vm, which mostly run in < 1 sec. This is done with a one-line change that simply reduces the number of iterations of several tests, from 256, to 10. Thanks to Ralph Campbell for suggesting changing NTIMES as a way to get the speedup. Suggested-by: Ralph Campbell Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Cc: SeongJae Park Cc: Shuah Khan Link: https://lkml.kernel.org/r/20201003011721.44238-1-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/hmm-tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 0a28a6a29581..c9404ef9698e 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -45,7 +45,7 @@ struct hmm_buffer { #define TWOMEG (1 << 21) #define HMM_BUFFER_SIZE (1024 << 12) #define HMM_PATH_MAX 64 -#define NTIMES 256 +#define NTIMES 10 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) -- cgit v1.3.1 From 71a0e29e99405d89b695882d52eec60844173697 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 15 Oct 2020 11:45:25 +0300 Subject: selftests: forwarding: Add missing 'rp_filter' configuration When 'rp_filter' is configured in strict mode (1) the tests fail because packets received from the macvlan netdevs would not be forwarded through them on the reverse path. Fix this by disabling the 'rp_filter', meaning no source validation is performed. Fixes: 1538812e0880 ("selftests: forwarding: Add a test for VXLAN asymmetric routing") Fixes: 438a4f5665b2 ("selftests: forwarding: Add a test for VXLAN symmetric routing") Signed-off-by: Ido Schimmel Reported-by: Hangbin Liu Tested-by: Hangbin Liu Link: https://lore.kernel.org/r/20201015084525.135121-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh | 10 ++++++++++ tools/testing/selftests/net/forwarding/vxlan_symmetric.sh | 10 ++++++++++ 2 files changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh index a0b5f57d6bd3..0727e2012b68 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh @@ -215,10 +215,16 @@ switch_create() bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0 + sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0 } switch_destroy() { + sysctl_restore net.ipv4.conf.all.rp_filter + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 @@ -359,6 +365,10 @@ ns_switch_create() bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0 + sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0 } export -f ns_switch_create diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh index 1209031bc794..5d97fa347d75 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh @@ -237,10 +237,16 @@ switch_create() bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0 + sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0 } switch_destroy() { + sysctl_restore net.ipv4.conf.all.rp_filter + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 @@ -402,6 +408,10 @@ ns_switch_create() bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0 + sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0 } export -f ns_switch_create -- cgit v1.3.1 From d1781f23704707d350b8c9006e2bdf5394bf91b2 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 13 Oct 2020 15:37:41 +1100 Subject: selftests/powerpc: Make alignment handler test P9N DD2.1 vector CI load workaround alignment_handler currently only tests the unaligned cases but it can also be useful for testing the workaround for the P9N DD2.1 vector CI load issue fixed by p9_hmi_special_emu(). This workaround was introduced in 5080332c2c89 ("powerpc/64s: Add workaround for P9 vector CI load issue"). This changes the loop to start from offset 0 rather than 1 so that we test the kernel emulation in p9_hmi_special_emu(). Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201013043741.743413-2-mikey@neuling.org --- tools/testing/selftests/powerpc/alignment/alignment_handler.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 2a0503bc7e49..cb53a8b777e6 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -266,8 +266,12 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) } rc = 0; - /* offset = 0 no alignment fault, so skip */ - for (offset = 1; offset < 16; offset++) { + /* + * offset = 0 is aligned but tests the workaround for the P9N + * DD2.1 vector CI load issue (see 5080332c2c89 "powerpc/64s: + * Add workaround for P9 vector CI load issue") + */ + for (offset = 0; offset < 16; offset++) { width = 16; /* vsx == 16 bytes */ r = 0; -- cgit v1.3.1 From e710bcc6d92c47bb7d8e803b41ef529c09ad6a9e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 19 Oct 2020 12:42:19 -0700 Subject: bpf: selftest: Ensure the return value of bpf_skc_to helpers must be checked This patch tests: int bpf_cls(struct __sk_buff *skb) { /* REG_6: sk * REG_7: tp * REG_8: req_sk */ sk = skb->sk; if (!sk) return 0; tp = bpf_skc_to_tcp_sock(sk); req_sk = bpf_skc_to_tcp_request_sock(sk); if (!req_sk) return 0; /* !tp has not been tested, so verifier should reject. */ return *(__u8 *)tp; } Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201019194219.1051314-1-kafai@fb.com --- tools/testing/selftests/bpf/verifier/sock.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index b1aac2641498..ce13ece08d51 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -631,3 +631,28 @@ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, .result = ACCEPT, }, +{ + "mark null check on return value of bpf_skc_to helpers", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_request_sock), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, -- cgit v1.3.1 From 8568c3cefd5143fa0dc09f90e1bc9dc8905292f4 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 19 Oct 2020 12:42:25 -0700 Subject: bpf: selftest: Ensure the return value of the bpf_per_cpu_ptr() must be checked This patch tests all pointers returned by bpf_per_cpu_ptr() must be tested for NULL first before it can be accessed. This patch adds a subtest "null_check", so it moves the ".data..percpu" existence check to the very beginning and before doing any subtest. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201019194225.1051596-1-kafai@fb.com --- tools/testing/selftests/bpf/prog_tests/ksyms_btf.c | 57 +++++++++++++++------- .../bpf/progs/test_ksyms_btf_null_check.c | 31 ++++++++++++ 2 files changed, 70 insertions(+), 18 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index 28e26bd3e0ca..b58b775d19f3 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -5,18 +5,17 @@ #include #include #include "test_ksyms_btf.skel.h" +#include "test_ksyms_btf_null_check.skel.h" static int duration; -void test_ksyms_btf(void) +static void test_basic(void) { __u64 runqueues_addr, bpf_prog_active_addr; __u32 this_rq_cpu; int this_bpf_prog_active; struct test_ksyms_btf *skel = NULL; struct test_ksyms_btf__data *data; - struct btf *btf; - int percpu_datasec; int err; err = kallsyms_find("runqueues", &runqueues_addr); @@ -31,20 +30,6 @@ void test_ksyms_btf(void) if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n")) return; - btf = libbpf_find_kernel_btf(); - if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", - PTR_ERR(btf))) - return; - - percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", - BTF_KIND_DATASEC); - if (percpu_datasec < 0) { - printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n", - __func__); - test__skip(); - goto cleanup; - } - skel = test_ksyms_btf__open_and_load(); if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) goto cleanup; @@ -83,6 +68,42 @@ void test_ksyms_btf(void) data->out__bpf_prog_active); cleanup: - btf__free(btf); test_ksyms_btf__destroy(skel); } + +static void test_null_check(void) +{ + struct test_ksyms_btf_null_check *skel; + + skel = test_ksyms_btf_null_check__open_and_load(); + CHECK(skel, "skel_open", "unexpected load of a prog missing null check\n"); + + test_ksyms_btf_null_check__destroy(skel); +} + +void test_ksyms_btf(void) +{ + int percpu_datasec; + struct btf *btf; + + btf = libbpf_find_kernel_btf(); + if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", + PTR_ERR(btf))) + return; + + percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", + BTF_KIND_DATASEC); + btf__free(btf); + if (percpu_datasec < 0) { + printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n", + __func__); + test__skip(); + return; + } + + if (test__start_subtest("basic")) + test_basic(); + + if (test__start_subtest("null_check")) + test_null_check(); +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c new file mode 100644 index 000000000000..8bc8f7c637bc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" + +#include + +extern const struct rq runqueues __ksym; /* struct type global var. */ +extern const int bpf_prog_active __ksym; /* int type global var. */ + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + struct rq *rq; + int *active; + __u32 cpu; + + cpu = bpf_get_smp_processor_id(); + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu); + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* READ_ONCE */ + *(volatile int *)active; + /* !rq has not been tested, so verifier should reject. */ + *(volatile int *)(&rq->cpu); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 26ebd6fed9bb3aa480c7c0f147ac0e7b11000f65 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Mon, 19 Oct 2020 11:09:28 +0800 Subject: selftests: rtnetlink: load fou module for kci_test_encap_fou() test The kci_test_encap_fou() test from kci_test_encap() in rtnetlink.sh needs the fou module to work. Otherwise it will fail with: $ ip netns exec "$testns" ip fou add port 7777 ipproto 47 RTNETLINK answers: No such file or directory Error talking to the kernel Add the CONFIG_NET_FOU into the config file as well. Which needs at least to be set as a loadable module. Fixes: 6227efc1a20b ("selftests: rtnetlink.sh: add vxlan and fou test cases") Signed-off-by: Po-Hsu Lin Link: https://lore.kernel.org/r/20201019030928.9859-1-po-hsu.lin@canonical.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + tools/testing/selftests/net/rtnetlink.sh | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 43649242adc0..4d5df8e1eee7 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -33,3 +33,4 @@ CONFIG_KALLSYMS=y CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m +CONFIG_NET_FOU=m diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 8a2fe6d64bf2..c9ce3dfa42ee 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -520,6 +520,11 @@ kci_test_encap_fou() return $ksft_skip fi + if ! /sbin/modprobe -q -n fou; then + echo "SKIP: module fou is not found" + return $ksft_skip + fi + /sbin/modprobe -q fou ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null if [ $? -ne 0 ];then echo "FAIL: can't add fou port 7777, skipping test" -- cgit v1.3.1 From a4fd8414659bf470e2146b352574bbd274e54b7a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 15 Oct 2020 09:00:20 -0700 Subject: selftests/timens: Add a test for futex() Output on success: 1..2 ok 1 futex with the 0 clockid ok 2 futex with the 1 clockid # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Andrei Vagin Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201015160020.293748-2-avagin@gmail.com --- tools/testing/selftests/timens/Makefile | 2 +- tools/testing/selftests/timens/futex.c | 110 ++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/timens/futex.c (limited to 'tools') diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index b4fd9a934654..3a5936cc10ab 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c new file mode 100644 index 000000000000..6b2b9264e851 --- /dev/null +++ b/tools/testing/selftests/timens/futex.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "log.h" +#include "timens.h" + +#define NSEC_PER_SEC 1000000000ULL + +static int run_test(int clockid) +{ + int futex_op = FUTEX_WAIT_BITSET; + struct timespec timeout, end; + int val = 0; + + if (clockid == CLOCK_REALTIME) + futex_op |= FUTEX_CLOCK_REALTIME; + + clock_gettime(clockid, &timeout); + timeout.tv_nsec += NSEC_PER_SEC / 10; // 100ms + if (timeout.tv_nsec > NSEC_PER_SEC) { + timeout.tv_sec++; + timeout.tv_nsec -= NSEC_PER_SEC; + } + + if (syscall(__NR_futex, &val, futex_op, 0, + &timeout, 0, FUTEX_BITSET_MATCH_ANY) >= 0) { + ksft_test_result_fail("futex didn't return ETIMEDOUT\n"); + return 1; + } + + if (errno != ETIMEDOUT) { + ksft_test_result_fail("futex didn't return ETIMEDOUT: %s\n", + strerror(errno)); + return 1; + } + + clock_gettime(clockid, &end); + + if (end.tv_sec < timeout.tv_sec || + (end.tv_sec == timeout.tv_sec && end.tv_nsec < timeout.tv_nsec)) { + ksft_test_result_fail("futex slept less than 100ms\n"); + return 1; + } + + + ksft_test_result_pass("futex with the %d clockid\n", clockid); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int status, len, fd; + char buf[4096]; + pid_t pid; + struct timespec mtime_now; + + nscheck(); + + ksft_set_plan(2); + + clock_gettime(CLOCK_MONOTONIC, &mtime_now); + + if (unshare_timens()) + return 1; + + len = snprintf(buf, sizeof(buf), "%d %d 0", + CLOCK_MONOTONIC, 70 * 24 * 3600); + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) + return pr_perror("/proc/self/timens_offsets"); + + if (write(fd, buf, len) != len) + return pr_perror("/proc/self/timens_offsets"); + + close(fd); + + pid = fork(); + if (pid < 0) + return pr_perror("Unable to fork"); + if (pid == 0) { + int ret = 0; + + ret |= run_test(CLOCK_REALTIME); + ret |= run_test(CLOCK_MONOTONIC); + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); + return 0; + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("Unable to wait the child process"); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + + return 1; +} -- cgit v1.3.1 From ba452c9e996d8a4c347b32805f91abb70de5de7e Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 20 Oct 2020 23:25:56 +0200 Subject: bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the discussion in [0], update the bpf_redirect_neigh() helper to accept an optional parameter specifying the nexthop information. This makes it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without incurring a duplicate FIB lookup - since the FIB lookup helper will return the nexthop information even if no neighbour is present, this can simply be passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns BPF_FIB_LKUP_RET_NO_NEIGH. Thus fix & extend it before helper API is frozen. [0] https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63fca@iogearbox.net/ Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Reviewed-by: David Ahern Link: https://lore.kernel.org/bpf/160322915615.32199.1187570224032024535.stgit@toke.dk --- include/linux/filter.h | 9 +++ include/uapi/linux/bpf.h | 22 ++++-- net/core/filter.c | 158 ++++++++++++++++++++++++++--------------- scripts/bpf_helpers_doc.py | 1 + tools/include/uapi/linux/bpf.h | 22 ++++-- 5 files changed, 145 insertions(+), 67 deletions(-) (limited to 'tools') diff --git a/include/linux/filter.h b/include/linux/filter.h index 20fc24c9779a..72d62cbc1578 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -607,12 +607,21 @@ struct bpf_skb_data_end { void *data_end; }; +struct bpf_nh_params { + u32 nh_family; + union { + u32 ipv4_nh; + struct in6_addr ipv6_nh; + }; +}; + struct bpf_redirect_info { u32 flags; u32 tgt_index; void *tgt_value; struct bpf_map *map; u32 kern_flags; + struct bpf_nh_params nh; }; DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf5a99d803e4..e6ceac3f7d62 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3677,15 +3677,19 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) * Description * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it * populates L2 addresses as well, meaning, internally, the helper - * performs a FIB lookup based on the skb's networking header to - * get the address of the next hop and then relies on the neighbor - * lookup for the L2 address of the nexthop. + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types, and enabled @@ -4906,6 +4910,16 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ diff --git a/net/core/filter.c b/net/core/filter.c index c5e2a1c5fd8d..6d0fa65a4a46 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) -static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, + struct net_device *dev, struct bpf_nh_params *nh) { - struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; u32 hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *nexthop; + struct dst_entry *dst = NULL; struct neighbour *neigh; if (dev_xmit_recursion()) { @@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), - &ipv6_hdr(skb)->daddr); + if (!nh) { + dst = skb_dst(skb); + nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), + &ipv6_hdr(skb)->daddr); + } else { + nexthop = &nh->ipv6_nh; + } neigh = ip_neigh_gw6(dev, nexthop); if (likely(!IS_ERR(neigh))) { int ret; @@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) return ret; } rcu_read_unlock_bh(); - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + if (dst) + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: kfree_skb(skb); return -ENETDOWN; } -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; - struct dst_entry *dst; - struct flowi6 fl6 = { - .flowi6_flags = FLOWI_FLAG_ANYSRC, - .flowi6_mark = skb->mark, - .flowlabel = ip6_flowinfo(ip6h), - .flowi6_oif = dev->ifindex, - .flowi6_proto = ip6h->nexthdr, - .daddr = ip6h->daddr, - .saddr = ip6h->saddr, - }; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); - if (IS_ERR(dst)) - goto out_drop; + if (!nh) { + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_flags = FLOWI_FLAG_ANYSRC, + .flowi6_mark = skb->mark, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_oif = dev->ifindex, + .flowi6_proto = ip6h->nexthdr, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + }; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + if (IS_ERR(dst)) + goto out_drop; - skb_dst_set(skb, dst); + skb_dst_set(skb, dst); + } else if (nh->nh_family != AF_INET6) { + goto out_drop; + } - err = bpf_out_neigh_v6(net, skb); + err = bpf_out_neigh_v6(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -2252,7 +2264,8 @@ out_xmit: return ret; } #else -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { kfree_skb(skb); return NET_XMIT_DROP; @@ -2260,11 +2273,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) #endif /* CONFIG_IPV6 */ #if IS_ENABLED(CONFIG_INET) -static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) +static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, + struct net_device *dev, struct bpf_nh_params *nh) { - struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; u32 hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; @@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) } rcu_read_lock_bh(); - neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + if (!nh) { + struct dst_entry *dst = skb_dst(skb); + struct rtable *rt = container_of(dst, struct rtable, dst); + + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + } else if (nh->nh_family == AF_INET6) { + neigh = ip_neigh_gw6(dev, &nh->ipv6_nh); + is_v6gw = true; + } else if (nh->nh_family == AF_INET) { + neigh = ip_neigh_gw4(dev, nh->ipv4_nh); + } else { + rcu_read_unlock_bh(); + goto out_drop; + } + if (likely(!IS_ERR(neigh))) { int ret; @@ -2309,33 +2334,37 @@ out_drop: return -ENETDOWN; } -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { const struct iphdr *ip4h = ip_hdr(skb); struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; - struct rtable *rt; - struct flowi4 fl4 = { - .flowi4_flags = FLOWI_FLAG_ANYSRC, - .flowi4_mark = skb->mark, - .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_oif = dev->ifindex, - .flowi4_proto = ip4h->protocol, - .daddr = ip4h->daddr, - .saddr = ip4h->saddr, - }; - rt = ip_route_output_flow(net, &fl4, NULL); - if (IS_ERR(rt)) - goto out_drop; - if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { - ip_rt_put(rt); - goto out_drop; - } + if (!nh) { + struct flowi4 fl4 = { + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_oif = dev->ifindex, + .flowi4_proto = ip4h->protocol, + .daddr = ip4h->daddr, + .saddr = ip4h->saddr, + }; + struct rtable *rt; + + rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto out_drop; + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + ip_rt_put(rt); + goto out_drop; + } - skb_dst_set(skb, &rt->dst); + skb_dst_set(skb, &rt->dst); + } - err = bpf_out_neigh_v4(net, skb); + err = bpf_out_neigh_v4(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -2348,14 +2377,16 @@ out_xmit: return ret; } #else -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { kfree_skb(skb); return NET_XMIT_DROP; } #endif /* CONFIG_INET */ -static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { struct ethhdr *ethh = eth_hdr(skb); @@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) skb_reset_network_header(skb); if (skb->protocol == htons(ETH_P_IP)) - return __bpf_redirect_neigh_v4(skb, dev); + return __bpf_redirect_neigh_v4(skb, dev, nh); else if (skb->protocol == htons(ETH_P_IPV6)) - return __bpf_redirect_neigh_v6(skb, dev); + return __bpf_redirect_neigh_v6(skb, dev, nh); out: kfree_skb(skb); return -ENOTSUPP; @@ -2382,7 +2413,8 @@ out: enum { BPF_F_NEIGH = (1ULL << 1), BPF_F_PEER = (1ULL << 2), -#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER) + BPF_F_NEXTHOP = (1ULL << 3), +#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP) }; BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) @@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb) return -EAGAIN; } return flags & BPF_F_NEIGH ? - __bpf_redirect_neigh(skb, dev) : + __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ? + &ri->nh : NULL) : __bpf_redirect(skb, dev, flags); out_drop: kfree_skb(skb); @@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags) +BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params, + int, plen, u64, flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - if (unlikely(flags)) + if (unlikely((plen && plen < sizeof(*params)) || flags)) return TC_ACT_SHOT; - ri->flags = BPF_F_NEIGH; + ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0); ri->tgt_index = ifindex; + BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params)); + if (plen) + memcpy(&ri->nh, params, sizeof(ri->nh)); + return TC_ACT_REDIRECT; } @@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_MEM_OR_NULL, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes) diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 7d86fdd190be..6769caae142f 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -453,6 +453,7 @@ class PrinterHelpers(Printer): 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', 'struct bpf_pidns_info', + 'struct bpf_redir_neigh', 'struct bpf_sk_lookup', 'struct bpf_sock', 'struct bpf_sock_addr', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bf5a99d803e4..e6ceac3f7d62 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3677,15 +3677,19 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) * Description * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it * populates L2 addresses as well, meaning, internally, the helper - * performs a FIB lookup based on the skb's networking header to - * get the address of the next hop and then relies on the neighbor - * lookup for the L2 address of the nexthop. + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types, and enabled @@ -4906,6 +4910,16 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ -- cgit v1.3.1 From adfd272c4ccbe43d9761bb17dd8a4387d7815382 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 20 Oct 2020 23:25:57 +0200 Subject: bpf, selftests: Extend test_tc_redirect to use modified bpf_redirect_neigh() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This updates the test_tc_neigh prog in selftests to use the new syntax of bpf_redirect_neigh(). To exercise the helper both with and without the optional parameter, add an additional test_tc_neigh_fib test program, which does a bpf_fib_lookup() followed by a call to bpf_redirect_neigh() instead of looking up the ifindex in a map. Update the test_tc_redirect.sh script to run both versions of the test, and while we're add it, fix it to work on systems that have a consolidated dual-stack 'ping' binary instead of separate ping/ping6 versions. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/160322915724.32199.17530068594636950447.stgit@toke.dk --- tools/testing/selftests/bpf/progs/test_tc_neigh.c | 5 +- .../selftests/bpf/progs/test_tc_neigh_fib.c | 155 +++++++++++++++++++++ tools/testing/selftests/bpf/test_tc_redirect.sh | 18 ++- 3 files changed, 173 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index fe182616b112..b985ac4e7a81 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -118,7 +119,7 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0); + return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0); } SEC("src_ingress") int tc_src(struct __sk_buff *skb) @@ -142,7 +143,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0); + return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c new file mode 100644 index 000000000000..d82ed3457030 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifndef ctx_ptr +# define ctx_ptr(field) (void *)(long)(field) +#endif + +#define AF_INET 2 +#define AF_INET6 10 + +static __always_inline int fill_fib_params_v4(struct __sk_buff *skb, + struct bpf_fib_lookup *fib_params) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct iphdr *ip4h; + + if (data + sizeof(struct ethhdr) > data_end) + return -1; + + ip4h = (struct iphdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip4h + 1) > data_end) + return -1; + + fib_params->family = AF_INET; + fib_params->tos = ip4h->tos; + fib_params->l4_protocol = ip4h->protocol; + fib_params->sport = 0; + fib_params->dport = 0; + fib_params->tot_len = bpf_ntohs(ip4h->tot_len); + fib_params->ipv4_src = ip4h->saddr; + fib_params->ipv4_dst = ip4h->daddr; + + return 0; +} + +static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, + struct bpf_fib_lookup *fib_params) +{ + struct in6_addr *src = (struct in6_addr *)fib_params->ipv6_src; + struct in6_addr *dst = (struct in6_addr *)fib_params->ipv6_dst; + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct ipv6hdr *ip6h; + + if (data + sizeof(struct ethhdr) > data_end) + return -1; + + ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip6h + 1) > data_end) + return -1; + + fib_params->family = AF_INET6; + fib_params->flowinfo = 0; + fib_params->l4_protocol = ip6h->nexthdr; + fib_params->sport = 0; + fib_params->dport = 0; + fib_params->tot_len = bpf_ntohs(ip6h->payload_len); + *src = ip6h->saddr; + *dst = ip6h->daddr; + + return 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + __u32 *raw = data; + + if (data + sizeof(struct ethhdr) > data_end) + return TC_ACT_SHOT; + + return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; +} + +static __always_inline int tc_redir(struct __sk_buff *skb) +{ + struct bpf_fib_lookup fib_params = { .ifindex = skb->ingress_ifindex }; + __u8 zero[ETH_ALEN * 2]; + int ret = -1; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + ret = fill_fib_params_v4(skb, &fib_params); + break; + case __bpf_constant_htons(ETH_P_IPV6): + ret = fill_fib_params_v6(skb, &fib_params); + break; + } + + if (ret) + return TC_ACT_OK; + + ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), 0); + if (ret == BPF_FIB_LKUP_RET_NOT_FWDED || ret < 0) + return TC_ACT_OK; + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + if (ret == BPF_FIB_LKUP_RET_NO_NEIGH) { + struct bpf_redir_neigh nh_params = {}; + + nh_params.nh_family = fib_params.family; + __builtin_memcpy(&nh_params.ipv6_nh, &fib_params.ipv6_dst, + sizeof(nh_params.ipv6_nh)); + + return bpf_redirect_neigh(fib_params.ifindex, &nh_params, + sizeof(nh_params), 0); + + } else if (ret == BPF_FIB_LKUP_RET_SUCCESS) { + void *data_end = ctx_ptr(skb->data_end); + struct ethhdr *eth = ctx_ptr(skb->data); + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); + __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + + return bpf_redirect(fib_params.ifindex, 0); + } + + return TC_ACT_SHOT; +} + +/* these are identical, but keep them separate for compatibility with the + * section names expected by test_tc_redirect.sh + */ +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + return tc_redir(skb); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + return tc_redir(skb); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh index 6d7482562140..8868aa1ca902 100755 --- a/tools/testing/selftests/bpf/test_tc_redirect.sh +++ b/tools/testing/selftests/bpf/test_tc_redirect.sh @@ -24,8 +24,7 @@ command -v timeout >/dev/null 2>&1 || \ { echo >&2 "timeout is not available"; exit 1; } command -v ping >/dev/null 2>&1 || \ { echo >&2 "ping is not available"; exit 1; } -command -v ping6 >/dev/null 2>&1 || \ - { echo >&2 "ping6 is not available"; exit 1; } +if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi command -v perl >/dev/null 2>&1 || \ { echo >&2 "perl is not available"; exit 1; } command -v jq >/dev/null 2>&1 || \ @@ -152,7 +151,7 @@ netns_test_connectivity() echo -e "${TEST}: ${GREEN}PASS${NC}" TEST="ICMPv6 connectivity test" - ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} + ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST} if [ $? -ne 0 ]; then echo -e "${TEST}: ${RED}FAIL${NC}" exit 1 @@ -170,6 +169,7 @@ hex_mem_str() netns_setup_bpf() { local obj=$1 + local use_forwarding=${2:-0} ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress @@ -179,6 +179,14 @@ netns_setup_bpf() ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress + if [ "$use_forwarding" -eq "1" ]; then + # bpf_fib_lookup() checks if forwarding is enabled + ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1 + ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1 + ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1 + return 0 + fi + veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) @@ -200,5 +208,9 @@ netns_setup_bpf test_tc_neigh.o netns_test_connectivity netns_cleanup netns_setup +netns_setup_bpf test_tc_neigh_fib.o 1 +netns_test_connectivity +netns_cleanup +netns_setup netns_setup_bpf test_tc_peer.o netns_test_connectivity -- cgit v1.3.1 From 3652c9a1b1fe6cbdd4510eb220db548bff8704ae Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Oct 2020 22:32:57 +0200 Subject: bpf, libbpf: Guard bpf inline asm from bpf_tail_call_static Yaniv reported a compilation error after pulling latest libbpf: [...] ../libbpf/src/root/usr/include/bpf/bpf_helpers.h:99:10: error: unknown register name 'r0' in asm : "r0", "r1", "r2", "r3", "r4", "r5"); [...] The issue got triggered given Yaniv was compiling tracing programs with native target (e.g. x86) instead of BPF target, hence no BTF generated vmlinux.h nor CO-RE used, and later llc with -march=bpf was invoked to compile from LLVM IR to BPF object file. Given that clang was expecting x86 inline asm and not BPF one the error complained that these regs don't exist on the former. Guard bpf_tail_call_static() with defined(__bpf__) where BPF inline asm is valid to use. BPF tracing programs on more modern kernels use BPF target anyway and thus the bpf_tail_call_static() function will be available for them. BPF inline asm is supported since clang 7 (clang <= 6 otherwise throws same above error), and __bpf_unreachable() since clang 8, therefore include the latter condition in order to prevent compilation errors for older clang versions. Given even an old Ubuntu 18.04 LTS has official LLVM packages all the way up to llvm-10, I did not bother to special case the __bpf_unreachable() inside bpf_tail_call_static() further. Also, undo the sockex3_kern's use of bpf_tail_call_static() sample given they still have the old hacky way to even compile networking progs with native instead of BPF target so bpf_tail_call_static() won't be defined there anymore. Fixes: 0e9f6841f664 ("bpf, libbpf: Add bpf_tail_call_static helper for bpf programs") Reported-by: Yaniv Agman Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Yonghong Song Tested-by: Yaniv Agman Link: https://lore.kernel.org/bpf/CAMy7=ZUk08w5Gc2Z-EKi4JFtuUCaZYmE4yzhJjrExXpYKR4L8w@mail.gmail.com Link: https://lore.kernel.org/bpf/20201021203257.26223-1-daniel@iogearbox.net --- samples/bpf/sockex3_kern.c | 8 ++++---- tools/lib/bpf/bpf_helpers.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 8142d02b33e6..b363503357e5 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -44,17 +44,17 @@ static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) switch (proto) { case ETH_P_8021Q: case ETH_P_8021AD: - bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN); + bpf_tail_call(skb, &jmp_table, PARSE_VLAN); break; case ETH_P_MPLS_UC: case ETH_P_MPLS_MC: - bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS); + bpf_tail_call(skb, &jmp_table, PARSE_MPLS); break; case ETH_P_IP: - bpf_tail_call_static(skb, &jmp_table, PARSE_IP); + bpf_tail_call(skb, &jmp_table, PARSE_IP); break; case ETH_P_IPV6: - bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6); + bpf_tail_call(skb, &jmp_table, PARSE_IPV6); break; } } diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 2bdb7d6dbad2..72b251110c4d 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -72,6 +72,7 @@ /* * Helper function to perform a tail call with a constant/immediate map slot. */ +#if __clang_major__ >= 8 && defined(__bpf__) static __always_inline void bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) { @@ -98,6 +99,7 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot) : "r0", "r1", "r2", "r3", "r4", "r5"); } +#endif /* * Helper structure used by eBPF C program -- cgit v1.3.1 From 287d35405989cfe0090e3059f7788dc531879a8d Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 21 Oct 2020 17:55:49 +0200 Subject: selftests: mptcp: depends on built-in IPv6 Recently, CONFIG_MPTCP_IPV6 no longer selects CONFIG_IPV6. As a consequence, if CONFIG_MPTCP_IPV6=y is added to the kconfig, it will no longer ensure CONFIG_IPV6=y. If it is not enabled, CONFIG_MPTCP_IPV6 will stay disabled and selftests will fail. We also need CONFIG_IPV6 to be built-in. For more details, please see commit 0ed37ac586c0 ("mptcp: depends on IPV6 but not as a module"). Note that 'make kselftest-merge' will take all 'config' files found in 'tools/testsing/selftests'. Because some of them already set CONFIG_IPV6=y, MPTCP selftests were still passing. But they will fail if MPTCP selftests are launched manually after having executed this command to prepare the kernel config: ./scripts/kconfig/merge_config.sh -m .config \ ./tools/testing/selftests/net/mptcp/config Fixes: 010b430d5df5 ("mptcp: MPTCP_IPV6 should depend on IPV6 instead of selecting it") Signed-off-by: Matthieu Baerts Reviewed-by: Mat Martineau Link: https://lore.kernel.org/r/20201021155549.933731-1-matthieu.baerts@tessares.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 8df5cb8f71ff..741a1c4f4ae8 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,5 @@ CONFIG_MPTCP=y +CONFIG_IPV6=y CONFIG_MPTCP_IPV6=y CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m -- cgit v1.3.1 From 6ff7cb371c4bea3dba03a56d774da925e78a5087 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 29 Sep 2020 17:28:42 -0400 Subject: tools/power turbostat: adjust for temperature offset cpu1: MSR_IA32_TEMPERATURE_TARGET: 0x05640000 (95 C) (100 default - 5 offset) Account for the new "offset" field in MSR_TEMPERATURE_TARGET. While this field is usually zero, ignoring it results in over-stating the current temperature, both per-core and per-package. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 62 ++++++++++++++++------------------- 1 file changed, 29 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0869f791ed14..4122468fb73b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4785,12 +4785,33 @@ double discover_bclk(unsigned int family, unsigned int model) * below this value, including the Digital Thermal Sensor (DTS), * Package Thermal Management Sensor (PTM), and thermal event thresholds. */ -int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int read_tcc_activation_temp() { unsigned long long msr; - unsigned int target_c_local; - int cpu; + unsigned int tcc, target_c, offset_c; + + /* Temperature Target MSR is Nehalem and newer only */ + if (!do_nhm_platform_info) + return 0; + + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) + return 0; + target_c = (msr >> 16) & 0xFF; + + offset_c = (msr >> 24) & 0xF; + + tcc = target_c - offset_c; + + if (!quiet) + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", + base_cpu, msr, tcc, target_c, offset_c); + + return tcc; +} + +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ /* tcc_activation_temp is used only for dts or ptm */ if (!(do_dts || do_ptm)) return 0; @@ -4799,43 +4820,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - cpu = t->cpu_id; - if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); - return -1; - } - if (tcc_activation_temp_override != 0) { tcc_activation_temp = tcc_activation_temp_override; - fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", - cpu, tcc_activation_temp); + fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp); return 0; } - /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nhm_platform_info) - goto guess; - - if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) - goto guess; - - target_c_local = (msr >> 16) & 0xFF; - - if (!quiet) - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", - cpu, msr, target_c_local); - - if (!target_c_local) - goto guess; - - tcc_activation_temp = target_c_local; - - return 0; + tcc_activation_temp = read_tcc_activation_temp(); + if (tcc_activation_temp) + return 0; -guess: tcc_activation_temp = TJMAX_DEFAULT; - fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", - cpu, tcc_activation_temp); + fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp); return 0; } -- cgit v1.3.1 From 3d7772ea5602b88c7c7f0a50d512171a2eed6659 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 30 Sep 2020 20:58:15 -0400 Subject: tools/power turbostat: harden against cpu hotplug turbostat tends to get confused when CPUs are added and removed while it is running. There are races, such as checking the current cpu, and then reading a sysfs file that depends on that cpu number. Close the two issues that seem to come up the most. First, there is an infinite reset loop detector -- change that to allow more resets before giving up. Secondly, one of those file reads didn't really need to exit the program on failure... Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4122468fb73b..74d2fc6fc78a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1894,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int i; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu); return -1; } @@ -2764,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu) sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); - filep = fopen_or_die(path, "r"); + filep = fopen(path, "r"); + + if (!filep) { + warnx("%s: open failed", path); + return -1; + } do { offset -= BITMASK_SIZE; if (fscanf(filep, "%lx%c", &map, &character) != 2) @@ -2877,7 +2882,7 @@ void re_initialize(void) { free_all_buffers(); setup_all_buffers(); - printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); + fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); } void set_max_cpu_num(void) @@ -3331,7 +3336,7 @@ restart: if (retval < -1) { exit(retval); } else if (retval == -1) { - if (restarted > 1) { + if (restarted > 10) { exit(retval); } re_initialize(); @@ -3926,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); return -1; } @@ -3970,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4058,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4439,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4511,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); return -1; } -- cgit v1.3.1 From 33def8498fdde180023444b08e12b72a9efed41d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 21 Oct 2020 19:36:07 -0700 Subject: treewide: Convert macro and uses of __section(foo) to __section("foo") Use a more generic form for __section that requires quotes to avoid complications with clang and gcc differences. Remove the quote operator # from compiler_attributes.h __section macro. Convert all unquoted __section(foo) uses to quoted __section("foo"). Also convert __attribute__((section("foo"))) uses to __section("foo") even if the __attribute__ has multiple list entry forms. Conversion done using the script at: https://lore.kernel.org/lkml/75393e5ddc272dc7403de74d645e6c6e0f4e70eb.camel@perches.com/2-convert_section.pl Signed-off-by: Joe Perches Reviewed-by: Nick Desaulniers Reviewed-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- arch/arc/include/asm/linkage.h | 8 ++++---- arch/arc/include/asm/mach_desc.h | 2 +- arch/arc/plat-hsdk/platform.c | 2 +- arch/arm/include/asm/cache.h | 2 +- arch/arm/include/asm/cpuidle.h | 2 +- arch/arm/include/asm/idmap.h | 2 +- arch/arm/include/asm/mach/arch.h | 4 ++-- arch/arm/include/asm/setup.h | 2 +- arch/arm/include/asm/smp.h | 2 +- arch/arm/include/asm/tcm.h | 8 ++++---- arch/arm/kernel/cpuidle.c | 2 +- arch/arm/kernel/devtree.c | 2 +- arch/arm64/include/asm/cache.h | 2 +- arch/arm64/kernel/efi.c | 2 +- arch/arm64/kernel/smp_spin_table.c | 2 +- arch/arm64/mm/mmu.c | 2 +- arch/csky/include/asm/tcm.h | 8 ++++---- arch/ia64/include/asm/cache.h | 2 +- arch/microblaze/kernel/setup.c | 2 +- arch/mips/include/asm/cache.h | 2 +- arch/mips/include/asm/machine.h | 2 +- arch/mips/kernel/setup.c | 2 +- arch/mips/mm/init.c | 2 +- arch/parisc/include/asm/cache.h | 2 +- arch/parisc/include/asm/ldcw.h | 2 +- arch/parisc/kernel/ftrace.c | 2 +- arch/parisc/mm/init.c | 6 +++--- arch/powerpc/include/asm/cache.h | 2 +- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/kernel/btext.c | 2 +- arch/powerpc/kernel/prom_init.c | 2 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +- arch/riscv/include/asm/soc.h | 4 ++-- arch/riscv/kernel/cpu_ops.c | 4 ++-- arch/riscv/kernel/setup.c | 4 ++-- arch/s390/boot/startup.c | 2 +- arch/s390/include/asm/cache.h | 2 +- arch/s390/include/asm/sections.h | 4 ++-- arch/s390/mm/init.c | 2 +- arch/sh/boards/of-generic.c | 2 +- arch/sh/include/asm/cache.h | 2 +- arch/sh/include/asm/machvec.h | 2 +- arch/sh/include/asm/smp.h | 2 +- arch/sparc/include/asm/cache.h | 2 +- arch/sparc/kernel/btext.c | 2 +- arch/um/include/shared/init.h | 22 +++++++++++----------- arch/um/kernel/skas/clone.c | 2 +- arch/um/kernel/um_arch.c | 2 +- arch/x86/boot/compressed/pgtable_64.c | 8 ++++---- arch/x86/boot/tty.c | 8 ++++---- arch/x86/boot/video.h | 2 +- arch/x86/include/asm/apic.h | 4 ++-- arch/x86/include/asm/cache.h | 2 +- arch/x86/include/asm/intel-mid.h | 2 +- arch/x86/include/asm/irqflags.h | 2 +- arch/x86/include/asm/mem_encrypt.h | 2 +- arch/x86/include/asm/setup.h | 2 +- arch/x86/kernel/cpu/cpu.h | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/mm/mem_encrypt.c | 6 +++--- arch/x86/mm/mem_encrypt_identity.c | 2 +- arch/x86/platform/pvh/enlighten.c | 4 ++-- arch/x86/purgatory/purgatory.c | 4 ++-- arch/x86/um/stub_segv.c | 2 +- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/enlighten_pvh.c | 2 +- arch/xtensa/kernel/setup.c | 2 +- drivers/clk/clk.c | 2 +- drivers/clocksource/timer-probe.c | 2 +- drivers/irqchip/irqchip.c | 2 +- drivers/of/of_reserved_mem.c | 2 +- drivers/thermal/thermal_core.h | 2 +- fs/xfs/xfs_message.h | 2 +- include/asm-generic/bug.h | 6 +++--- include/asm-generic/error-injection.h | 2 +- include/asm-generic/kprobes.h | 4 ++-- include/kunit/test.h | 2 +- include/linux/acpi.h | 4 ++-- include/linux/cache.h | 2 +- include/linux/compiler.h | 8 ++++---- include/linux/compiler_attributes.h | 2 +- include/linux/cpu.h | 2 +- include/linux/dynamic_debug.h | 2 +- include/linux/export.h | 2 +- include/linux/firmware.h | 2 +- include/linux/init.h | 34 +++++++++++++++++----------------- include/linux/init_task.h | 4 ++-- include/linux/interrupt.h | 4 ++-- include/linux/kernel.h | 6 +++--- include/linux/linkage.h | 4 ++-- include/linux/lsm_hooks.h | 4 ++-- include/linux/module.h | 2 +- include/linux/moduleparam.h | 4 ++-- include/linux/mtd/xip.h | 2 +- include/linux/objtool.h | 2 +- include/linux/of.h | 2 +- include/linux/percpu-defs.h | 2 +- include/linux/printk.h | 4 ++-- include/linux/rcupdate.h | 2 +- include/linux/sched/debug.h | 2 +- include/linux/serial_core.h | 2 +- include/linux/spinlock.h | 2 +- include/linux/syscalls.h | 6 +++--- include/linux/trace_events.h | 2 +- include/linux/tracepoint.h | 8 ++++---- include/trace/bpf_probe.h | 2 +- include/trace/trace_events.h | 10 +++++----- kernel/kallsyms.c | 4 ++-- kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/idle.c | 2 +- kernel/sched/rt.c | 2 +- kernel/sched/stop_task.c | 2 +- kernel/trace/trace.h | 2 +- kernel/trace/trace_export.c | 2 +- scripts/mod/modpost.c | 4 ++-- tools/include/linux/objtool.h | 2 +- 117 files changed, 196 insertions(+), 196 deletions(-) (limited to 'tools') diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index fe19f1d412e7..c9434ff3aa4c 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -64,15 +64,15 @@ #else /* !__ASSEMBLY__ */ #ifdef CONFIG_ARC_HAS_ICCM -#define __arcfp_code __section(.text.arcfp) +#define __arcfp_code __section(".text.arcfp") #else -#define __arcfp_code __section(.text) +#define __arcfp_code __section(".text") #endif #ifdef CONFIG_ARC_HAS_DCCM -#define __arcfp_data __section(.data.arcfp) +#define __arcfp_data __section(".data.arcfp") #else -#define __arcfp_data __section(.data) +#define __arcfp_data __section(".data") #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index 73746ed5b834..c4e197059379 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -53,7 +53,7 @@ extern const struct machine_desc __arch_info_begin[], __arch_info_end[]; */ #define MACHINE_START(_type, _name) \ static const struct machine_desc __mach_desc_##_type \ -__used __section(.arch.info.init) = { \ +__used __section(".arch.info.init") = { \ .name = _name, #define MACHINE_END \ diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 0b961a2a10b8..0b63fc095b99 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -13,7 +13,7 @@ #include #include -int arc_hsdk_axi_dmac_coherent __section(.data) = 0; +int arc_hsdk_axi_dmac_coherent __section(".data") = 0; #define ARC_CCM_UNUSED_ADDR 0x60000000 diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index 1d65ed3a2755..e3ea34558ada 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -24,6 +24,6 @@ #define ARCH_SLAB_MINALIGN 8 #endif -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(".data..read_mostly") #endif diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h index 6b2ff7243b4b..0d67ed682e07 100644 --- a/arch/arm/include/asm/cpuidle.h +++ b/arch/arm/include/asm/cpuidle.h @@ -42,7 +42,7 @@ struct of_cpuidle_method { #define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops) \ static const struct of_cpuidle_method __cpuidle_method_of_table_##name \ - __used __section(__cpuidle_method_of_table) \ + __used __section("__cpuidle_method_of_table") \ = { .method = _method, .ops = _ops } extern int arm_cpuidle_suspend(int index); diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h index aab7e8358e6a..baebb67b3512 100644 --- a/arch/arm/include/asm/idmap.h +++ b/arch/arm/include/asm/idmap.h @@ -6,7 +6,7 @@ #include /* Tag a function as requiring to be executed via an identity mapping. */ -#define __idmap __section(.idmap.text) noinline notrace +#define __idmap __section(".idmap.text") noinline notrace extern pgd_t *idmap_pgd; diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index e7df5a822cab..eec0c0bda766 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -81,7 +81,7 @@ extern const struct machine_desc __arch_info_begin[], __arch_info_end[]; #define MACHINE_START(_type,_name) \ static const struct machine_desc __mach_desc_##_type \ __used \ - __attribute__((__section__(".arch.info.init"))) = { \ + __section(".arch.info.init") = { \ .nr = MACH_TYPE_##_type, \ .name = _name, @@ -91,7 +91,7 @@ static const struct machine_desc __mach_desc_##_type \ #define DT_MACHINE_START(_name, _namestr) \ static const struct machine_desc __mach_desc_##_name \ __used \ - __attribute__((__section__(".arch.info.init"))) = { \ + __section(".arch.info.init") = { \ .nr = ~0, \ .name = _namestr, diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index 67d20712cb48..3ae68a1b3de6 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -14,7 +14,7 @@ #include -#define __tag __used __attribute__((__section__(".taglist.init"))) +#define __tag __used __section(".taglist.init") #define __tagtable(tag, fn) \ static const struct tagtable __tagtable_##fn __tag = { tag, fn } diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 0ca55a607d0a..5d508f5d56c4 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -112,7 +112,7 @@ struct of_cpu_method { #define CPU_METHOD_OF_DECLARE(name, _method, _ops) \ static const struct of_cpu_method __cpu_method_of_table_##name \ - __used __section(__cpu_method_of_table) \ + __used __section("__cpu_method_of_table") \ = { .method = _method, .ops = _ops } /* * set platform specific SMP operations diff --git a/arch/arm/include/asm/tcm.h b/arch/arm/include/asm/tcm.h index b845b10fe29a..d8bd8a4b0ede 100644 --- a/arch/arm/include/asm/tcm.h +++ b/arch/arm/include/asm/tcm.h @@ -16,13 +16,13 @@ #include /* Tag variables with this */ -#define __tcmdata __section(.tcm.data) +#define __tcmdata __section(".tcm.data") /* Tag constants with this */ -#define __tcmconst __section(.tcm.rodata) +#define __tcmconst __section(".tcm.rodata") /* Tag functions inside TCM called from outside TCM with this */ -#define __tcmfunc __attribute__((long_call)) __section(.tcm.text) noinline +#define __tcmfunc __attribute__((long_call)) __section(".tcm.text") noinline /* Tag function inside TCM called from inside TCM with this */ -#define __tcmlocalfunc __section(.tcm.text) +#define __tcmlocalfunc __section(".tcm.text") void *tcm_alloc(size_t len); void tcm_free(void *addr, size_t len); diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c index 093368e0d020..e1684623e1b2 100644 --- a/arch/arm/kernel/cpuidle.c +++ b/arch/arm/kernel/cpuidle.c @@ -11,7 +11,7 @@ extern struct of_cpuidle_method __cpuidle_method_of_table[]; static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel - __used __section(__cpuidle_method_of_table_end); + __used __section("__cpuidle_method_of_table_end"); static struct cpuidle_ops cpuidle_ops[NR_CPUS] __ro_after_init; diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 39c978698406..7f0745a97e20 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -29,7 +29,7 @@ extern struct of_cpu_method __cpu_method_of_table[]; static const struct of_cpu_method __cpu_method_of_table_sentinel - __used __section(__cpu_method_of_table_end); + __used __section("__cpu_method_of_table_end"); static int __init set_smp_ops_by_method(struct device_node *node) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a4d1b5f771f6..0ac3e06a2118 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -79,7 +79,7 @@ static inline u32 cache_type_cwg(void) return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; } -#define __read_mostly __section(.data..read_mostly) +#define __read_mostly __section(".data..read_mostly") static inline int cache_line_size_of_cpu(void) { diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index d0cf596db82c..fa02efb28e88 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -54,7 +54,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) } /* we will fill this structure from the stub, so don't put it in .bss */ -struct screen_info screen_info __section(.data); +struct screen_info screen_info __section(".data"); int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 5892e79fa429..056772c26098 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -19,7 +19,7 @@ #include extern void secondary_holding_pen(void); -volatile unsigned long __section(.mmuoff.data.read) +volatile unsigned long __section(".mmuoff.data.read") secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index beff3ad8c7f8..1c0f3e02f731 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -43,7 +43,7 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; -u64 __section(.mmuoff.data.write) vabits_actual; +u64 __section(".mmuoff.data.write") vabits_actual; EXPORT_SYMBOL(vabits_actual); u64 kimage_voffset __ro_after_init; diff --git a/arch/csky/include/asm/tcm.h b/arch/csky/include/asm/tcm.h index 2b135cefb73f..bd1e662ecdfa 100644 --- a/arch/csky/include/asm/tcm.h +++ b/arch/csky/include/asm/tcm.h @@ -10,13 +10,13 @@ #include /* Tag variables with this */ -#define __tcmdata __section(.tcm.data) +#define __tcmdata __section(".tcm.data") /* Tag constants with this */ -#define __tcmconst __section(.tcm.rodata) +#define __tcmconst __section(".tcm.rodata") /* Tag functions inside TCM called from outside TCM with this */ -#define __tcmfunc __section(.tcm.text) noinline +#define __tcmfunc __section(".tcm.text") noinline /* Tag function inside TCM called from inside TCM with this */ -#define __tcmlocalfunc __section(.tcm.text) +#define __tcmlocalfunc __section(".tcm.text") void *tcm_alloc(size_t len); void tcm_free(void *addr, size_t len); diff --git a/arch/ia64/include/asm/cache.h b/arch/ia64/include/asm/cache.h index 4eb6f742d14f..2f1c70647068 100644 --- a/arch/ia64/include/asm/cache.h +++ b/arch/ia64/include/asm/cache.h @@ -25,6 +25,6 @@ # define SMP_CACHE_BYTES (1 << 3) #endif -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(".data..read_mostly") #endif /* _ASM_IA64_CACHE_H */ diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 2310daff1f8a..333b09658ca8 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -46,7 +46,7 @@ DEFINE_PER_CPU(unsigned int, CURRENT_SAVE); /* Saved current pointer */ * ASM code. Default position is BSS section which is cleared * in machine_early_init(). */ -char cmd_line[COMMAND_LINE_SIZE] __attribute__ ((section(".data"))); +char cmd_line[COMMAND_LINE_SIZE] __section(".data"); void __init setup_arch(char **cmdline_p) { diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h index 8b14c2706aa5..29187e12b861 100644 --- a/arch/mips/include/asm/cache.h +++ b/arch/mips/include/asm/cache.h @@ -14,6 +14,6 @@ #define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(".data..read_mostly") #endif /* _ASM_CACHE_H */ diff --git a/arch/mips/include/asm/machine.h b/arch/mips/include/asm/machine.h index 29ca344a8cab..fc64cce270f0 100644 --- a/arch/mips/include/asm/machine.h +++ b/arch/mips/include/asm/machine.h @@ -23,7 +23,7 @@ extern long __mips_machines_end; #define MIPS_MACHINE(name) \ static const struct mips_machine __mips_mach_##name \ - __used __section(.mips.machines.init) + __used __section(".mips.machines.init") #define for_each_mips_machine(mach) \ for ((mach) = (struct mips_machine *)&__mips_machines_start; \ diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index fccdbe2e7c2b..0d4253208bde 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -44,7 +44,7 @@ #include #ifdef CONFIG_MIPS_ELF_APPENDED_DTB -const char __section(.appended_dtb) __appended_dtb[0x100000]; +const char __section(".appended_dtb") __appended_dtb[0x100000]; #endif /* CONFIG_MIPS_ELF_APPENDED_DTB */ struct cpuinfo_mips cpu_data[NR_CPUS] __read_mostly; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 6c7bbfe35ba3..07e84a774938 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -569,7 +569,7 @@ unsigned long pgd_current[NR_CPUS]; * size, and waste space. So we place it in its own section and align * it in the linker script. */ -pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); +pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); #ifndef __PAGETABLE_PUD_FOLDED pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; #endif diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index e5de3f897633..d53e9e27dba0 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -22,7 +22,7 @@ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES -#define __read_mostly __section(.data..read_mostly) +#define __read_mostly __section(".data..read_mostly") void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index e080143e79a3..6d28b5514699 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -52,7 +52,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __section(.data..lock_aligned) +# define __lock_aligned __section(".data..lock_aligned") #endif #endif /* __PARISC_LDCW_H */ diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 4bab21c71055..63e3ecb9da81 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -21,7 +21,7 @@ #include #include -#define __hot __attribute__ ((__section__ (".text.hot"))) +#define __hot __section(".text.hot") #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 4381b65ae1e0..3ec633b11b54 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -42,11 +42,11 @@ extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ * guarantee that global objects will be laid out in memory in the same order * as the order of declaration, so put these in different sections and use * the linker script to order them. */ -pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned(PAGE_SIZE))); +pmd_t pmd0[PTRS_PER_PMD] __section(".data..vm0.pmd") __attribute__ ((aligned(PAGE_SIZE))); #endif -pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE))); -pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE))); +pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".data..vm0.pgd") __attribute__ ((aligned(PAGE_SIZE))); +pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __section(".data..vm0.pte") __attribute__ ((aligned(PAGE_SIZE))); static struct resource data_resource = { .name = "Kernel data", diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 2124b7090db9..ae0a68a838e8 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -97,7 +97,7 @@ static inline u32 l1_icache_bytes(void) #endif -#define __read_mostly __section(.data..read_mostly) +#define __read_mostly __section(".data..read_mostly") #ifdef CONFIG_PPC_BOOK3S_32 extern long _get_L2CR(void); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 95081078aa8a..475687f24f4a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -232,7 +232,7 @@ extern void book3e_idle(void); extern struct machdep_calls ppc_md; extern struct machdep_calls *machine_id; -#define __machine_desc __attribute__ ((__section__ (".machine.desc"))) +#define __machine_desc __section(".machine.desc") #define define_machine(name) \ extern struct machdep_calls mach_##name; \ diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index c22a8e0dbc93..803c2a45b22a 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -26,7 +26,7 @@ static void scrollscreen(void); #endif -#define __force_data __section(.data) +#define __force_data __section(".data") static int g_loc_X __force_data; static int g_loc_Y __force_data; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 5090a5ab54e5..38ae5933d917 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -45,7 +45,7 @@ #include /* All of prom_init bss lives here */ -#define __prombss __section(.bss.prominit) +#define __prombss __section(".bss.prominit") /* * Eventually bump that one up diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 470e7c518a10..083a4e037718 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -32,7 +32,7 @@ #ifdef CONFIG_BUG #define WARN_ON_ONCE_RM(condition) ({ \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(".data.unlikely") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ diff --git a/arch/riscv/include/asm/soc.h b/arch/riscv/include/asm/soc.h index 136a442ef876..6c8363b1f327 100644 --- a/arch/riscv/include/asm/soc.h +++ b/arch/riscv/include/asm/soc.h @@ -13,7 +13,7 @@ #define SOC_EARLY_INIT_DECLARE(name, compat, fn) \ static const struct of_device_id __soc_early_init__##name \ - __used __section(__soc_early_init_table) \ + __used __section("__soc_early_init_table") \ = { .compatible = compat, .data = fn } void soc_early_init(void); @@ -46,7 +46,7 @@ struct soc_builtin_dtb { } \ \ static const struct soc_builtin_dtb __soc_builtin_dtb__##name \ - __used __section(__soc_builtin_dtb_table) = \ + __used __section("__soc_builtin_dtb_table") = \ { \ .vendor_id = vendor, \ .arch_id = arch, \ diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index 0ec22354018c..1985884fe829 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -15,8 +15,8 @@ const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -void *__cpu_up_stack_pointer[NR_CPUS] __section(.data); -void *__cpu_up_task_pointer[NR_CPUS] __section(.data); +void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); +void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); extern const struct cpu_operations cpu_ops_sbi; extern const struct cpu_operations cpu_ops_spinwait; diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4c96ac198e14..c424cc6dd833 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -32,7 +32,7 @@ #include "head.h" #if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI) -struct screen_info screen_info __section(.data) = { +struct screen_info screen_info __section(".data") = { .orig_video_lines = 30, .orig_video_cols = 80, .orig_video_mode = 0, @@ -47,7 +47,7 @@ struct screen_info screen_info __section(.data) = { * This is used before the kernel initializes the BSS so it can't be in the * BSS. */ -atomic_t hart_lottery __section(.sdata); +atomic_t hart_lottery __section(".sdata"); unsigned long boot_cpu_hartid; static DEFINE_PER_CPU(struct cpu, cpu_devices); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 90842936545b..cc96b04cc0ba 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -46,7 +46,7 @@ struct diag_ops __bootdata_preserved(diag_dma_ops) = { .diag0c = _diag0c_dma, .diag308_reset = _diag308_reset_dma }; -static struct diag210 _diag210_tmp_dma __section(.dma.data); +static struct diag210 _diag210_tmp_dma __section(".dma.data"); struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; void error(char *x) diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index d5e22e837416..00128174c025 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -14,6 +14,6 @@ #define L1_CACHE_SHIFT 8 #define NET_SKB_PAD 32 -#define __read_mostly __section(.data..read_mostly) +#define __read_mostly __section(".data..read_mostly") #endif diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 42de04ad9c07..a996d3990a02 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -26,14 +26,14 @@ static inline int arch_is_kernel_initmem_freed(unsigned long addr) * final .boot.data section, which should be identical in the decompressor and * the decompressed kernel (that is checked during the build). */ -#define __bootdata(var) __section(.boot.data.var) var +#define __bootdata(var) __section(".boot.data.var") var /* * .boot.preserved.data is similar to .boot.data, but it is not part of the * .init section and thus will be preserved for later use in the decompressed * kernel. */ -#define __bootdata_preserved(var) __section(.boot.preserved.data.var) var +#define __bootdata_preserved(var) __section(".boot.preserved.data.var") var extern unsigned long __sdma, __edma; extern unsigned long __stext_dma, __etext_dma; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 284939f9661c..77767850d0d0 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -48,7 +48,7 @@ #include #include -pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); +pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index d91065e81a4e..bffbe69b2236 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -49,7 +49,7 @@ static struct plat_smp_ops dummy_smp_ops = { extern const struct of_cpu_method __cpu_method_of_table[]; const struct of_cpu_method __cpu_method_of_table_sentinel - __section(__cpu_method_of_table_end); + __section("__cpu_method_of_table_end"); static void sh_of_smp_probe(void) { diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h index a293343456af..32dfa6b82ec6 100644 --- a/arch/sh/include/asm/cache.h +++ b/arch/sh/include/asm/cache.h @@ -14,7 +14,7 @@ #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(".data..read_mostly") #ifndef __ASSEMBLY__ struct cache_info { diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h index f7d05546beca..2b4b085e8f21 100644 --- a/arch/sh/include/asm/machvec.h +++ b/arch/sh/include/asm/machvec.h @@ -36,6 +36,6 @@ extern struct sh_machine_vector sh_mv; #define get_system_type() sh_mv.mv_name #define __initmv \ - __used __section(.machvec.init) + __used __section(".machvec.init") #endif /* _ASM_SH_MACHVEC_H */ diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h index 100bf241340b..199381f77293 100644 --- a/arch/sh/include/asm/smp.h +++ b/arch/sh/include/asm/smp.h @@ -71,7 +71,7 @@ struct of_cpu_method { #define CPU_METHOD_OF_DECLARE(name, _method, _ops) \ static const struct of_cpu_method __cpu_method_of_table_##name \ - __used __section(__cpu_method_of_table) \ + __used __section("__cpu_method_of_table") \ = { .method = _method, .ops = _ops } #else diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h index dcfd58118c11..e62fd0e72606 100644 --- a/arch/sparc/include/asm/cache.h +++ b/arch/sparc/include/asm/cache.h @@ -21,6 +21,6 @@ #define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT) -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(".data..read_mostly") #endif /* !(_SPARC_CACHE_H) */ diff --git a/arch/sparc/kernel/btext.c b/arch/sparc/kernel/btext.c index 5869773f3dc4..e2d3f0d2971f 100644 --- a/arch/sparc/kernel/btext.c +++ b/arch/sparc/kernel/btext.c @@ -24,7 +24,7 @@ static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); -#define __force_data __attribute__((__section__(".data"))) +#define __force_data __section(".data") static int g_loc_X __force_data; static int g_loc_Y __force_data; diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h index c66de434a983..1a659e2e8cc3 100644 --- a/arch/um/include/shared/init.h +++ b/arch/um/include/shared/init.h @@ -45,15 +45,15 @@ typedef void (*exitcall_t)(void); /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) -#define __initdata __section(.init.data) -#define __exitdata __section(.exit.data) -#define __exit_call __used __section(.exitcall.exit) +#define __init __section(".init.text") +#define __initdata __section(".init.data") +#define __exitdata __section(".exit.data") +#define __exit_call __used __section(".exitcall.exit") #ifdef MODULE -#define __exit __section(.exit.text) +#define __exit __section(".exit.text") #else -#define __exit __used __section(.exit.text) +#define __exit __used __section(".exit.text") #endif #endif @@ -102,10 +102,10 @@ extern struct uml_param __uml_setup_start, __uml_setup_end; * Mark functions and data as being only used at initialization * or exit time. */ -#define __uml_init_setup __used __section(.uml.setup.init) -#define __uml_setup_help __used __section(.uml.help.init) -#define __uml_postsetup_call __used __section(.uml.postsetup.init) -#define __uml_exit_call __used __section(.uml.exitcall.exit) +#define __uml_init_setup __used __section(".uml.setup.init") +#define __uml_setup_help __used __section(".uml.help.init") +#define __uml_postsetup_call __used __section(".uml.postsetup.init") +#define __uml_exit_call __used __section(".uml.exitcall.exit") #ifdef __UM_HOST__ @@ -120,7 +120,7 @@ extern struct uml_param __uml_setup_start, __uml_setup_end; #define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn -#define __init_call __used __section(.initcall.init) +#define __init_call __used __section(".initcall.init") #endif diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index bfb70c456b30..95c355181dcd 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -21,7 +21,7 @@ * on some systems. */ -void __attribute__ ((__section__ (".__syscall_stub"))) +void __section(".__syscall_stub") stub_clone_handler(void) { struct stub_data *data = (struct stub_data *) STUB_DATA; diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 00141e70de56..76b37297b7d4 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -52,7 +52,7 @@ struct cpuinfo_um boot_cpu_data = { }; union thread_union cpu0_irqstack - __attribute__((__section__(".data..init_irqstack"))) = + __section(".data..init_irqstack") = { .thread_info = INIT_THREAD_INFO(init_task) }; /* Changed in setup_arch, which is called in early boot */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 5def1674d6f1..2a78746f5a4c 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -10,9 +10,9 @@ #ifdef CONFIG_X86_5LEVEL /* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */ -unsigned int __section(.data) __pgtable_l5_enabled; -unsigned int __section(.data) pgdir_shift = 39; -unsigned int __section(.data) ptrs_per_p4d = 1; +unsigned int __section(".data") __pgtable_l5_enabled; +unsigned int __section(".data") pgdir_shift = 39; +unsigned int __section(".data") ptrs_per_p4d = 1; #endif struct paging_config { @@ -30,7 +30,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; * Avoid putting the pointer into .bss as it will be cleared between * paging_prepare() and extract_kernel(). */ -unsigned long *trampoline_32bit __section(.data); +unsigned long *trampoline_32bit __section(".data"); extern struct boot_params *boot_params; int cmdline_find_option_bool(const char *option); diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 1fedabdb95ad..f7eb976b0a4b 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -25,7 +25,7 @@ int early_serial_base; * error during initialization. */ -static void __attribute__((section(".inittext"))) serial_putchar(int ch) +static void __section(".inittext") serial_putchar(int ch) { unsigned timeout = 0xffff; @@ -35,7 +35,7 @@ static void __attribute__((section(".inittext"))) serial_putchar(int ch) outb(ch, early_serial_base + TXR); } -static void __attribute__((section(".inittext"))) bios_putchar(int ch) +static void __section(".inittext") bios_putchar(int ch) { struct biosregs ireg; @@ -47,7 +47,7 @@ static void __attribute__((section(".inittext"))) bios_putchar(int ch) intcall(0x10, &ireg, NULL); } -void __attribute__((section(".inittext"))) putchar(int ch) +void __section(".inittext") putchar(int ch) { if (ch == '\n') putchar('\r'); /* \n -> \r\n */ @@ -58,7 +58,7 @@ void __attribute__((section(".inittext"))) putchar(int ch) serial_putchar(ch); } -void __attribute__((section(".inittext"))) puts(const char *str) +void __section(".inittext") puts(const char *str) { while (*str) putchar(*str++); diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index cbf7fed22441..04bde0bb2003 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -78,7 +78,7 @@ struct card_info { u16 xmode_n; /* Size of unprobed mode range */ }; -#define __videocard struct card_info __attribute__((used,section(".videocards"))) +#define __videocard struct card_info __section(".videocards") __attribute__((used)) extern struct card_info video_cards[], video_cards_end[]; int mode_defined(u16 mode); /* video.c */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1c129abb7f09..4e3099d9ae62 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -374,12 +374,12 @@ extern struct apic *apic; #define apic_driver(sym) \ static const struct apic *__apicdrivers_##sym __used \ __aligned(sizeof(struct apic *)) \ - __section(.apicdrivers) = { &sym } + __section(".apicdrivers") = { &sym } #define apic_drivers(sym1, sym2) \ static struct apic *__apicdrivers_##sym1##sym2[2] __used \ __aligned(sizeof(struct apic *)) \ - __section(.apicdrivers) = { &sym1, &sym2 } + __section(".apicdrivers") = { &sym1, &sym2 } extern struct apic *__apicdrivers[], *__apicdrivers_end[]; diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index abe08690a887..69404eae9983 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h @@ -8,7 +8,7 @@ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(".data..read_mostly") #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT #define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index de58391bdee0..cf0e25f45422 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -43,7 +43,7 @@ struct devs_id { #define sfi_device(i) \ static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \ - __attribute__((__section__(".x86_intel_mid_dev.init"))) = &i + __section(".x86_intel_mid_dev.init") = &i /** * struct mid_sd_board_info - template for SD device creation diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 02a0cf547d7b..2dfc8d380dab 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -9,7 +9,7 @@ #include /* Provide __cpuidle; we can't safely include */ -#define __cpuidle __attribute__((__section__(".cpuidle.text"))) +#define __cpuidle __section(".cpuidle.text") /* * Interrupt control: diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c9f5df0a1c10..2f62bbdd9d12 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -54,7 +54,7 @@ bool sme_active(void); bool sev_active(void); bool sev_es_active(void); -#define __bss_decrypted __attribute__((__section__(".bss..decrypted"))) +#define __bss_decrypted __section(".bss..decrypted") #else /* !CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 7d7a064af6ff..389d851a02c4 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -119,7 +119,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(.discard.text) __used notrace \ + static void __section(".discard.text") __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 9d033693519a..67944128876d 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -38,7 +38,7 @@ struct _tlb_table { #define cpu_dev_register(cpu_devX) \ static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ - __attribute__((__section__(".x86_cpu_dev.init"))) = \ + __section(".x86_cpu_dev.init") = \ &cpu_devX; extern const struct cpu_dev *const __x86_cpu_dev_start[], diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 4199f25c0063..05e117137b45 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -84,7 +84,7 @@ static struct desc_ptr startup_gdt_descr = { .address = 0, }; -#define __head __section(.head.text) +#define __head __section(".head.text") static void __head *fixup_pointer(void *ptr, unsigned long physaddr) { diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index ebb7edc8bc0a..efbb3de472df 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -37,13 +37,13 @@ * reside in the .data section so as not to be zeroed out when the .bss * section is later cleared. */ -u64 sme_me_mask __section(.data) = 0; -u64 sev_status __section(.data) = 0; +u64 sme_me_mask __section(".data") = 0; +u64 sev_status __section(".data") = 0; EXPORT_SYMBOL(sme_me_mask); DEFINE_STATIC_KEY_FALSE(sev_enable_key); EXPORT_SYMBOL_GPL(sev_enable_key); -bool sev_enabled __section(.data); +bool sev_enabled __section(".data"); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 68d75379e06a..733b983f3a89 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -81,7 +81,7 @@ struct sme_populate_pgd_data { * section is 2MB aligned to allow for simple pagetable setup using only * PMD entries (see vmlinux.lds.S). */ -static char sme_workarea[2 * PMD_PAGE_SIZE] __section(.init.scratch); +static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index c0a502f7e3a7..9ac7457f52a3 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -19,8 +19,8 @@ * pvh_bootparams and pvh_start_info need to live in the data segment since * they are used after startup_{32|64}, which clear .bss, are invoked. */ -struct boot_params pvh_bootparams __attribute__((section(".data"))); -struct hvm_start_info pvh_start_info __attribute__((section(".data"))); +struct boot_params pvh_bootparams __section(".data"); +struct hvm_start_info pvh_start_info __section(".data"); unsigned int pvh_start_info_sz = sizeof(pvh_start_info); diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 2961234d0795..7b37a412f829 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -14,9 +14,9 @@ #include "../boot/string.h" -u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory); +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(".kexec-purgatory"); -struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory); +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(".kexec-purgatory"); static int verify_sha256_digest(void) { diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index 27361cbb7ca9..fdcd58af707a 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -8,7 +8,7 @@ #include #include -void __attribute__ ((__section__ (".__syscall_stub"))) +void __section(".__syscall_stub") stub_segv_handler(int sig, siginfo_t *info, void *p) { ucontext_t *uc = p; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 205b1176084f..aa9f50fccc5d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); * NB: needs to live in .data because it's used by xen_prepare_pvh which runs * before clearing the bss. */ -uint32_t xen_start_flags __attribute__((section(".data"))) = 0; +uint32_t xen_start_flags __section(".data") = 0; EXPORT_SYMBOL(xen_start_flags); /* diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 80a79db72fcf..0d5e34b9e6f9 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -21,7 +21,7 @@ * The variable xen_pvh needs to live in the data segment since it is used * after startup_{32|64} is invoked, which will clear the .bss segment. */ -bool xen_pvh __attribute__((section(".data"))) = 0; +bool xen_pvh __section(".data") = 0; void __init xen_pvh_init(struct boot_params *boot_params) { diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index be2c78f71695..ed184106e4cf 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -93,7 +93,7 @@ typedef struct tagtable { } tagtable_t; #define __tagtable(tag, fn) static tagtable_t __tagtable_##fn \ - __attribute__((used, section(".taglist"))) = { tag, fn } + __section(".taglist") __attribute__((used)) = { tag, fn } /* parse current tag */ diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 0a9261a099bd..f83dac54ed85 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4363,7 +4363,7 @@ struct of_clk_provider { extern struct of_device_id __clk_of_table; static const struct of_device_id __clk_of_table_sentinel - __used __section(__clk_of_table_end); + __used __section("__clk_of_table_end"); static LIST_HEAD(of_clk_providers); static DEFINE_MUTEX(of_clk_mutex); diff --git a/drivers/clocksource/timer-probe.c b/drivers/clocksource/timer-probe.c index ee9574da53c0..b7860bc0db4b 100644 --- a/drivers/clocksource/timer-probe.c +++ b/drivers/clocksource/timer-probe.c @@ -11,7 +11,7 @@ extern struct of_device_id __timer_of_table[]; static const struct of_device_id __timer_of_table_sentinel - __used __section(__timer_of_table_end); + __used __section("__timer_of_table_end"); void __init timer_probe(void) { diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c index d2341153e181..3570f0a588c4 100644 --- a/drivers/irqchip/irqchip.c +++ b/drivers/irqchip/irqchip.c @@ -22,7 +22,7 @@ * special section. */ static const struct of_device_id -irqchip_of_match_end __used __section(__irqchip_of_table_end); +irqchip_of_match_end __used __section("__irqchip_of_table_end"); extern struct of_device_id __irqchip_of_table[]; diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 46b9371c8a33..bcd154485972 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -162,7 +162,7 @@ static int __init __reserved_mem_alloc_size(unsigned long node, } static const struct of_device_id __rmem_of_table_sentinel - __used __section(__reservedmem_of_table_end); + __used __section("__reservedmem_of_table_end"); /** * __reserved_mem_init_node() - call region specific reserved memory init code diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 764c2de31771..681209db42a8 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -34,7 +34,7 @@ extern struct thermal_governor *__governor_thermal_table_end[]; #define THERMAL_TABLE_ENTRY(table, name) \ static typeof(name) *__thermal_table_entry_##name \ - __used __section(__##table##_thermal_table) = &name + __used __section("__" #table "_thermal_table") = &name #define THERMAL_GOVERNOR_DECLARE(name) THERMAL_TABLE_ENTRY(governor, name) diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index 4d9bd6bb63ca..3c392b1512ac 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -42,7 +42,7 @@ do { \ #define xfs_printk_once(func, dev, fmt, ...) \ ({ \ - static bool __section(.data.once) __print_once; \ + static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 18b0f4eee8cb..76a10e0dca9f 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -141,7 +141,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, #ifndef WARN_ON_ONCE #define WARN_ON_ONCE(condition) ({ \ - static bool __section(.data.once) __warned; \ + static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -153,7 +153,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, #endif #define WARN_ONCE(condition, format...) ({ \ - static bool __section(.data.once) __warned; \ + static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -164,7 +164,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, }) #define WARN_TAINT_ONCE(condition, taint, format...) ({ \ - static bool __section(.data.once) __warned; \ + static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index 80ca61058dd2..7ddd9dc10ce9 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -25,7 +25,7 @@ struct pt_regs; */ #define ALLOW_ERROR_INJECTION(fname, _etype) \ static struct error_injection_entry __used \ - __attribute__((__section__("_error_injection_whitelist"))) \ + __section("_error_injection_whitelist") \ _eil_addr_##fname = { \ .addr = (unsigned long)fname, \ .etype = EI_ETYPE_##_etype, \ diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h index 4a982089c95c..060eab094e5a 100644 --- a/include/asm-generic/kprobes.h +++ b/include/asm-generic/kprobes.h @@ -10,11 +10,11 @@ */ # define __NOKPROBE_SYMBOL(fname) \ static unsigned long __used \ - __attribute__((__section__("_kprobe_blacklist"))) \ + __section("_kprobe_blacklist") \ _kbl_addr_##fname = (unsigned long)fname; # define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) /* Use this to forbid a kprobes attach on very low level functions */ -# define __kprobes __attribute__((__section__(".kprobes.text"))) +# define __kprobes __section(".kprobes.text") # define nokprobe_inline __always_inline #else # define NOKPROBE_SYMBOL(fname) diff --git a/include/kunit/test.h b/include/kunit/test.h index a423fffefea0..9197da792336 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -288,7 +288,7 @@ static inline int kunit_run_all_tests(void) static struct kunit_suite *unique_array[] = { __VA_ARGS__, NULL }; \ kunit_test_suites_for_module(unique_array); \ static struct kunit_suite **unique_suites \ - __used __section(.kunit_test_suites) = unique_array + __used __section(".kunit_test_suites") = unique_array /** * kunit_test_suites() - used to register one or more &struct kunit_suite diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 143c6ffce2db..39263c6b52e1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1153,7 +1153,7 @@ struct acpi_probe_entry { #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, \ valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ - __used __section(__##table##_acpi_probe_table) = { \ + __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ @@ -1164,7 +1164,7 @@ struct acpi_probe_entry { #define ACPI_DECLARE_SUBTABLE_PROBE_ENTRY(table, name, table_id, \ subtable, valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ - __used __section(__##table##_acpi_probe_table) = { \ + __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ diff --git a/include/linux/cache.h b/include/linux/cache.h index 1aa8009f6d06..d742c57eaee5 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -34,7 +34,7 @@ * but may get written to during init, so can't live in .rodata (via "const"). */ #ifndef __ro_after_init -#define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) +#define __ro_after_init __section(".data..ro_after_init") #endif #ifndef ____cacheline_aligned diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ac45f6d40d39..e512f5505dad 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -24,7 +24,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, long ______r; \ static struct ftrace_likely_data \ __aligned(4) \ - __section(_ftrace_annotated_branch) \ + __section("_ftrace_annotated_branch") \ ______f = { \ .data.func = __func__, \ .data.file = __FILE__, \ @@ -60,7 +60,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __trace_if_value(cond) ({ \ static struct ftrace_branch_data \ __aligned(4) \ - __section(_ftrace_branch) \ + __section("_ftrace_branch") \ __if_trace = { \ .func = __func__, \ .file = __FILE__, \ @@ -118,7 +118,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, ".popsection\n\t" /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(.rodata..c_jump_table) +#define __annotate_jump_table __section(".rodata..c_jump_table") #else #define annotate_reachable() @@ -206,7 +206,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * visible to the compiler. */ #define __ADDRESSABLE(sym) \ - static void * __section(.discard.addressable) __used \ + static void * __section(".discard.addressable") __used \ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; /** diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index ea7b756b1c8f..b2a3f4f641a7 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -254,7 +254,7 @@ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-section-variable-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate */ -#define __section(S) __attribute__((__section__(#S))) +#define __section(section) __attribute__((__section__(section))) /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8aa84c052fdf..d6428aaf67e7 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -173,7 +173,7 @@ void cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); /* Attach to any functions which should be considered cpuidle. */ -#define __cpuidle __attribute__((__section__(".cpuidle.text"))) +#define __cpuidle __section(".cpuidle.text") bool cpu_in_idle(unsigned long pc); diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 8aa0c7c2608c..a57ee75342cf 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -84,7 +84,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ static struct _ddebug __aligned(8) \ - __section(__dyndbg) name = { \ + __section("__dyndbg") name = { \ .modname = KBUILD_MODNAME, \ .function = __func__, \ .filename = __FILE__, \ diff --git a/include/linux/export.h b/include/linux/export.h index 8933ff6ad23a..fceb5e855717 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -130,7 +130,7 @@ struct kernel_symbol { * discarded in the final link stage. */ #define __ksym_marker(sym) \ - static int __ksym_marker_##sym[0] __section(.discard.ksym) __used + static int __ksym_marker_##sym[0] __section(".discard.ksym") __used #define __EXPORT_SYMBOL(sym, sec, ns) \ __ksym_marker(sym); \ diff --git a/include/linux/firmware.h b/include/linux/firmware.h index c15acadc6cf4..84e346ae766e 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -36,7 +36,7 @@ struct builtin_fw { #define DECLARE_BUILTIN_FIRMWARE_SIZE(name, blob, size) \ static const struct builtin_fw __fw_concat(__builtin_fw,__COUNTER__) \ - __used __section(.builtin_fw) = { name, blob, size } + __used __section(".builtin_fw") = { name, blob, size } #if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE)) int request_firmware(const struct firmware **fw, const char *name, diff --git a/include/linux/init.h b/include/linux/init.h index 212fc9e2f691..7b53cb3092ee 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -47,11 +47,11 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline -#define __initdata __section(.init.data) -#define __initconst __section(.init.rodata) -#define __exitdata __section(.exit.data) -#define __exit_call __used __section(.exitcall.exit) +#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline +#define __initdata __section(".init.data") +#define __initconst __section(".init.rodata") +#define __exitdata __section(".exit.data") +#define __exit_call __used __section(".exitcall.exit") /* * modpost check for section mismatches during the kernel build. @@ -70,9 +70,9 @@ * * The markers follow same syntax rules as __init / __initdata. */ -#define __ref __section(.ref.text) noinline -#define __refdata __section(.ref.data) -#define __refconst __section(.ref.rodata) +#define __ref __section(".ref.text") noinline +#define __refdata __section(".ref.data") +#define __refconst __section(".ref.rodata") #ifdef MODULE #define __exitused @@ -80,16 +80,16 @@ #define __exitused __used #endif -#define __exit __section(.exit.text) __exitused __cold notrace +#define __exit __section(".exit.text") __exitused __cold notrace /* Used for MEMORY_HOTPLUG */ -#define __meminit __section(.meminit.text) __cold notrace \ +#define __meminit __section(".meminit.text") __cold notrace \ __latent_entropy -#define __meminitdata __section(.meminit.data) -#define __meminitconst __section(.meminit.rodata) -#define __memexit __section(.memexit.text) __exitused __cold notrace -#define __memexitdata __section(.memexit.data) -#define __memexitconst __section(.memexit.rodata) +#define __meminitdata __section(".meminit.data") +#define __meminitconst __section(".meminit.rodata") +#define __memexit __section(".memexit.text") __exitused __cold notrace +#define __memexitdata __section(".memexit.data") +#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" @@ -254,7 +254,7 @@ struct obs_kernel_param { static const char __setup_str_##unique_id[] __initconst \ __aligned(1) = str; \ static struct obs_kernel_param __setup_##unique_id \ - __used __section(.init.setup) \ + __used __section(".init.setup") \ __attribute__((aligned((sizeof(long))))) \ = { __setup_str_##unique_id, fn, early } @@ -298,7 +298,7 @@ void __init parse_early_options(char *cmdline); #endif /* Data marked not to be saved by software suspend */ -#define __nosavedata __section(.data..nosave) +#define __nosavedata __section(".data..nosave") #ifdef MODULE #define __exit_p(x) x diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2c620d7ac432..b2412b4d4c20 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -40,12 +40,12 @@ extern struct cred init_cred; /* Attach to the init_task data structure for proper alignment */ #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK -#define __init_task_data __attribute__((__section__(".data..init_task"))) +#define __init_task_data __section(".data..init_task") #else #define __init_task_data /**/ #endif /* Attach to the thread_info data structure for proper alignment */ -#define __init_thread_info __attribute__((__section__(".data..init_thread_info"))) +#define __init_thread_info __section(".data..init_thread_info") #endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f9aee3538461..ee8299eb1f52 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -792,9 +792,9 @@ extern int arch_early_irq_init(void); * We want to know which function is an entrypoint of a hardirq or a softirq. */ #ifndef __irq_entry -# define __irq_entry __attribute__((__section__(".irqentry.text"))) +# define __irq_entry __section(".irqentry.text") #endif -#define __softirq_entry __attribute__((__section__(".softirqentry.text"))) +#define __softirq_entry __section(".softirqentry.text") #endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index c629215fdad9..2f05e9128201 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -729,7 +729,7 @@ do { \ #define do_trace_printk(fmt, args...) \ do { \ static const char *trace_printk_fmt __used \ - __attribute__((section("__trace_printk_fmt"))) = \ + __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __trace_printk_check_format(fmt, ##args); \ @@ -773,7 +773,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); #define trace_puts(str) ({ \ static const char *trace_printk_fmt __used \ - __attribute__((section("__trace_printk_fmt"))) = \ + __section("__trace_printk_fmt") = \ __builtin_constant_p(str) ? str : NULL; \ \ if (__builtin_constant_p(str)) \ @@ -795,7 +795,7 @@ extern void trace_dump_stack(int skip); do { \ if (__builtin_constant_p(fmt)) { \ static const char *trace_printk_fmt __used \ - __attribute__((section("__trace_printk_fmt"))) = \ + __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ diff --git a/include/linux/linkage.h b/include/linux/linkage.h index d796ec20d114..5bcfbd972e97 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -36,8 +36,8 @@ __stringify(name)) #endif -#define __page_aligned_data __section(.data..page_aligned) __aligned(PAGE_SIZE) -#define __page_aligned_bss __section(.bss..page_aligned) __aligned(PAGE_SIZE) +#define __page_aligned_data __section(".data..page_aligned") __aligned(PAGE_SIZE) +#define __page_aligned_bss __section(".bss..page_aligned") __aligned(PAGE_SIZE) /* * For assembly routines. diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 8814e3d5952d..c503f7ab8afb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1611,12 +1611,12 @@ extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; #define DEFINE_LSM(lsm) \ static struct lsm_info __lsm_##lsm \ - __used __section(.lsm_info.init) \ + __used __section(".lsm_info.init") \ __aligned(sizeof(unsigned long)) #define DEFINE_EARLY_LSM(lsm) \ static struct lsm_info __early_lsm_##lsm \ - __used __section(.early_lsm_info.init) \ + __used __section(".early_lsm_info.init") \ __aligned(sizeof(unsigned long)) #ifdef CONFIG_SECURITY_SELINUX_DISABLE diff --git a/include/linux/module.h b/include/linux/module.h index a29187f7c360..7ccdf87f376f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -278,7 +278,7 @@ extern typeof(name) __mod_##type##__##name##_device_table \ .version = _version, \ }; \ static const struct module_version_attribute \ - __used __attribute__ ((__section__ ("__modver"))) \ + __used __section("__modver") \ * __moduleparam_const __modver_attr = &___modver_attr #endif diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 47879fc7f75e..6388eb9734a5 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -22,7 +22,7 @@ #define __MODULE_INFO(tag, name, info) \ static const char __UNIQUE_ID(name)[] \ - __used __attribute__((section(".modinfo"), unused, aligned(1))) \ + __used __section(".modinfo") __attribute__((unused, aligned(1))) \ = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ @@ -289,7 +289,7 @@ struct kparam_array static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ - __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ + __section("__param") __attribute__ ((unused, aligned(sizeof(void *)))) \ = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } diff --git a/include/linux/mtd/xip.h b/include/linux/mtd/xip.h index a4e352b1dfe6..3cac9360588f 100644 --- a/include/linux/mtd/xip.h +++ b/include/linux/mtd/xip.h @@ -28,7 +28,7 @@ * those functions so they get relocated to ram. */ #ifdef CONFIG_XIP_KERNEL -#define __xipram noinline __attribute__ ((__section__ (".xiptext"))) +#define __xipram noinline __section(".xiptext") #endif /* diff --git a/include/linux/objtool.h b/include/linux/objtool.h index ab82c793c897..577f51436cf9 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -60,7 +60,7 @@ struct unwind_hint { * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(.discard.func_stack_frame_non_standard) \ + static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func #else /* __ASSEMBLY__ */ diff --git a/include/linux/of.h b/include/linux/of.h index 481ec0467285..5d51891cbf1a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1299,7 +1299,7 @@ static inline int of_get_available_child_count(const struct device_node *np) #if defined(CONFIG_OF) && !defined(MODULE) #define _OF_DECLARE(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ - __used __section(__##table##_of_table) \ + __used __section("__" #table "_of_table") \ = { .compatible = compat, \ .data = (fn == (fn_type)NULL) ? fn : fn } #else diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 176bfbd52d97..dff7040f629a 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -51,7 +51,7 @@ PER_CPU_ATTRIBUTES #define __PCPU_DUMMY_ATTRS \ - __attribute__((section(".discard"), unused)) + __section(".discard") __attribute__((unused)) /* * s390 and alpha modules require percpu variables to be defined as diff --git a/include/linux/printk.h b/include/linux/printk.h index 78479633ccfc..fe7eb2351610 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -437,7 +437,7 @@ extern int kptr_restrict; #ifdef CONFIG_PRINTK #define printk_once(fmt, ...) \ ({ \ - static bool __section(.data.once) __print_once; \ + static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ @@ -448,7 +448,7 @@ extern int kptr_restrict; }) #define printk_deferred_once(fmt, ...) \ ({ \ - static bool __section(.data.once) __print_once; \ + static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7c1ceff02852..6cdd0152c253 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -299,7 +299,7 @@ static inline int rcu_read_lock_any_held(void) */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(".data.unlikely") __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && (c)) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 00c45a0e6abe..ae51f4529fc9 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -43,7 +43,7 @@ extern void proc_sched_set_task(struct task_struct *p); #endif /* Attach to any functions which should be ignored in wchan output. */ -#define __sched __attribute__((__section__(".sched.text"))) +#define __sched __section(".sched.text") /* Linker adds these: start and end of __sched functions */ extern char __sched_text_start[], __sched_text_end[]; diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8a99279a579b..ff63c2963359 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -373,7 +373,7 @@ extern const struct earlycon_id *__earlycon_table_end[]; .compatible = compat, \ .setup = fn }; \ static const struct earlycon_id EARLYCON_USED_OR_UNUSED \ - __section(__earlycon_table) \ + __section("__earlycon_table") \ * const __PASTE(__p, unique_id) = &unique_id #define OF_EARLYCON_DECLARE(_name, compat, fn) \ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index f2f12d746dbd..79897841a2cc 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -76,7 +76,7 @@ #define LOCK_SECTION_END \ ".previous\n\t" -#define __lockfunc __attribute__((section(".spinlock.text"))) +#define __lockfunc __section(".spinlock.text") /* * Pull the arch_spinlock_t and arch_rwlock_t definitions: diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2eda7678fe1d..37bea07c12f2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -144,7 +144,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct trace_event_call __used \ - __attribute__((section("_ftrace_events"))) \ + __section("_ftrace_events") \ *__event_enter_##sname = &event_enter_##sname; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ @@ -160,7 +160,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct trace_event_call __used \ - __attribute__((section("_ftrace_events"))) \ + __section("_ftrace_events") \ *__event_exit_##sname = &event_exit_##sname; #define SYSCALL_METADATA(sname, nb, ...) \ @@ -184,7 +184,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ }; \ static struct syscall_metadata __used \ - __attribute__((section("__syscalls_metadata"))) \ + __section("__syscalls_metadata") \ *__p_syscall_meta_##sname = &__syscall_meta_##sname; static inline int is_syscall_trace_event(struct trace_event_call *tp_event) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5c6943354049..d321fe5ad1a1 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -709,7 +709,7 @@ do { \ tracing_record_cmdline(current); \ if (__builtin_constant_p(fmt)) { \ static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))) = \ + __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __trace_bprintk(ip, trace_printk_fmt, ##args); \ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 81fa0b2f271e..0f21617f1a66 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -119,7 +119,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __TRACEPOINT_ENTRY(name) \ static tracepoint_ptr_t __tracepoint_ptr_##name __used \ - __section(__tracepoints_ptrs) = &__tracepoint_##name + __section("__tracepoints_ptrs") = &__tracepoint_##name #endif #endif /* _LINUX_TRACEPOINT_H */ @@ -286,11 +286,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) */ #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ static const char __tpstrtab_##_name[] \ - __section(__tracepoints_strings) = #_name; \ + __section("__tracepoints_strings") = #_name; \ extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ int __traceiter_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used \ - __section(__tracepoints) = { \ + __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ @@ -396,7 +396,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static const char *___tp_str __tracepoint_string = str; \ ___tp_str; \ }) -#define __tracepoint_string __used __section(__tracepoint_str) +#define __tracepoint_string __used __section("__tracepoint_str") #else /* * tracepoint_string() is used to save the string address for userspace diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 1ce3be63add1..cd74bffed5c6 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -79,7 +79,7 @@ static union { \ struct bpf_raw_event_map event; \ btf_trace_##call handler; \ } __bpf_trace_tp_map_##call __used \ -__attribute__((section("__bpf_raw_tp_map"))) = { \ +__section("__bpf_raw_tp_map") = { \ .event = { \ .tp = &__tracepoint_##call, \ .bpf_func = __bpf_trace_##template, \ diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 1bc3e7bba9a4..7785961d82ba 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -45,7 +45,7 @@ TRACE_MAKE_SYSTEM_STR(); .eval_value = a \ }; \ static struct trace_eval_map __used \ - __attribute__((section("_ftrace_eval_map"))) \ + __section("_ftrace_eval_map") \ *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a #undef TRACE_DEFINE_SIZEOF @@ -58,7 +58,7 @@ TRACE_MAKE_SYSTEM_STR(); .eval_value = sizeof(a) \ }; \ static struct trace_eval_map __used \ - __attribute__((section("_ftrace_eval_map"))) \ + __section("_ftrace_eval_map") \ *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a /* @@ -607,7 +607,7 @@ static inline notrace int trace_event_get_offsets_##call( \ * // its only safe to use pointers when doing linker tricks to * // create an array. * static struct trace_event_call __used - * __attribute__((section("_ftrace_events"))) *__event_ = &event_; + * __section("_ftrace_events") *__event_ = &event_; * */ @@ -755,7 +755,7 @@ static struct trace_event_call __used event_##call = { \ .flags = TRACE_EVENT_FL_TRACEPOINT, \ }; \ static struct trace_event_call __used \ -__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call +__section("_ftrace_events") *__event_##call = &event_##call #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ @@ -772,6 +772,6 @@ static struct trace_event_call __used event_##call = { \ .flags = TRACE_EVENT_FL_TRACEPOINT, \ }; \ static struct trace_event_call __used \ -__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call +__section("_ftrace_events") *__event_##call = &event_##call #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 4fb15fa96734..fe9de067771c 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -40,10 +40,10 @@ extern const u8 kallsyms_names[] __weak; * has one (eg: FRV). */ extern const unsigned int kallsyms_num_syms -__attribute__((weak, section(".rodata"))); +__section(".rodata") __attribute__((weak)); extern const unsigned long kallsyms_relative_base -__attribute__((weak, section(".rodata"))); +__section(".rodata") __attribute__((weak)); extern const char kallsyms_token_table[] __weak; extern const u16 kallsyms_token_index[] __weak; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 6d93f4518734..f232305dcefe 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2504,7 +2504,7 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p, } const struct sched_class dl_sched_class - __attribute__((section("__dl_sched_class"))) = { + __section("__dl_sched_class") = { .enqueue_task = enqueue_task_dl, .dequeue_task = dequeue_task_dl, .yield_task = yield_task_dl, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e17012be4d14..290f9e38378c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11159,7 +11159,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task * All the scheduling class methods: */ const struct sched_class fair_sched_class - __attribute__((section("__fair_sched_class"))) = { + __section("__fair_sched_class") = { .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index f324dc36fc43..24d0ee26377d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -458,7 +458,7 @@ static void update_curr_idle(struct rq *rq) * Simple, special scheduling class for the per-CPU idle tasks: */ const struct sched_class idle_sched_class - __attribute__((section("__idle_sched_class"))) = { + __section("__idle_sched_class") = { /* no enqueue/yield_task for idle tasks */ /* dequeue is not valid, we print a debug message there: */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f215eea6a966..49ec096a8aa1 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2430,7 +2430,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) } const struct sched_class rt_sched_class - __attribute__((section("__rt_sched_class"))) = { + __section("__rt_sched_class") = { .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, .yield_task = yield_task_rt, diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 394bc8126a1e..ceb5b6b12561 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -110,7 +110,7 @@ static void update_curr_stop(struct rq *rq) * Simple, special scheduling class for the per-CPU stop tasks: */ const struct sched_class stop_sched_class - __attribute__((section("__stop_sched_class"))) = { + __section("__stop_sched_class") = { .enqueue_task = enqueue_task_stop, .dequeue_task = dequeue_task_stop, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 34e0c4d5a6e7..f3f5e77123ad 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -99,7 +99,7 @@ enum trace_type { /* Use this for memory failure errors */ #define MEM_FAIL(condition, fmt, ...) ({ \ - static bool __section(.data.once) __warned; \ + static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 70d3d0a09053..90f81d33fa3f 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -176,7 +176,7 @@ struct trace_event_call __used event_##call = { \ .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \ }; \ static struct trace_event_call __used \ -__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; +__section("_ftrace_events") *__event_##call = &event_##call; #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 69341b36f271..f882ce0d9327 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2254,7 +2254,7 @@ static void add_header(struct buffer *b, struct module *mod) buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n"); buf_printf(b, "\n"); buf_printf(b, "__visible struct module __this_module\n"); - buf_printf(b, "__section(.gnu.linkonce.this_module) = {\n"); + buf_printf(b, "__section(\".gnu.linkonce.this_module\") = {\n"); buf_printf(b, "\t.name = KBUILD_MODNAME,\n"); if (mod->has_init) buf_printf(b, "\t.init = init_module,\n"); @@ -2308,7 +2308,7 @@ static int add_versions(struct buffer *b, struct module *mod) buf_printf(b, "\n"); buf_printf(b, "static const struct modversion_info ____versions[]\n"); - buf_printf(b, "__used __section(__versions) = {\n"); + buf_printf(b, "__used __section(\"__versions\") = {\n"); for (s = mod->unres; s; s = s->next) { if (!s->module) diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index ab82c793c897..577f51436cf9 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -60,7 +60,7 @@ struct unwind_hint { * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(.discard.func_stack_frame_non_standard) \ + static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func #else /* __ASSEMBLY__ */ -- cgit v1.3.1 From e722a295cf493388dae474745d30e91e1a2ec549 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Aug 2020 14:36:27 +0200 Subject: staging: ion: remove from the tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ION android code has long been marked to be removed, now that we dma-buf support merged into the real part of the kernel. It was thought that we could wait to remove the ion kernel at a later time, but as the out-of-tree Android fork of the ion code has diverged quite a bit, and any Android device using the ion interface uses that forked version and not this in-tree version, the in-tree copy of the code is abandonded and not used by anyone. Combine this abandoned codebase with the need to make changes to it in order to keep the kernel building properly, which then causes merge issues when merging those changes into the out-of-tree Android code, and you end up with two different groups of people (the in-kernel-tree developers, and the Android kernel developers) who are both annoyed at the current situation. Because of this problem, just drop the in-kernel copy of the ion code now, as it's not used, and is only causing problems for everyone involved. Cc: "Arve Hjønnevåg" Cc: "Christian König" Cc: Christian Brauner Cc: Christoph Hellwig Cc: Hridya Valsaraju Cc: Joel Fernandes Cc: John Stultz Cc: Laura Abbott Cc: Martijn Coenen Cc: Shuah Khan Cc: Sumit Semwal Cc: Suren Baghdasaryan Cc: Todd Kjos Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200827123627.538189-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 10 - drivers/staging/android/Kconfig | 2 - drivers/staging/android/Makefile | 2 - drivers/staging/android/TODO | 5 - drivers/staging/android/ion/Kconfig | 27 - drivers/staging/android/ion/Makefile | 4 - drivers/staging/android/ion/ion.c | 649 --------------------- drivers/staging/android/ion/ion.h | 302 ---------- drivers/staging/android/ion/ion_cma_heap.c | 138 ----- drivers/staging/android/ion/ion_heap.c | 286 --------- drivers/staging/android/ion/ion_page_pool.c | 155 ----- drivers/staging/android/ion/ion_system_heap.c | 377 ------------ drivers/staging/android/uapi/ion.h | 127 ---- tools/testing/selftests/Makefile | 3 +- tools/testing/selftests/android/Makefile | 39 -- tools/testing/selftests/android/config | 5 - tools/testing/selftests/android/ion/.gitignore | 4 - tools/testing/selftests/android/ion/Makefile | 20 - tools/testing/selftests/android/ion/README | 101 ---- tools/testing/selftests/android/ion/ion.h | 134 ----- tools/testing/selftests/android/ion/ion_test.sh | 58 -- .../testing/selftests/android/ion/ionapp_export.c | 127 ---- .../testing/selftests/android/ion/ionapp_import.c | 79 --- tools/testing/selftests/android/ion/ionmap_test.c | 136 ----- tools/testing/selftests/android/ion/ionutils.c | 253 -------- tools/testing/selftests/android/ion/ionutils.h | 55 -- tools/testing/selftests/android/ion/ipcsocket.c | 227 ------- tools/testing/selftests/android/ion/ipcsocket.h | 35 -- tools/testing/selftests/android/run.sh | 3 - 29 files changed, 1 insertion(+), 3362 deletions(-) delete mode 100644 drivers/staging/android/ion/Kconfig delete mode 100644 drivers/staging/android/ion/Makefile delete mode 100644 drivers/staging/android/ion/ion.c delete mode 100644 drivers/staging/android/ion/ion.h delete mode 100644 drivers/staging/android/ion/ion_cma_heap.c delete mode 100644 drivers/staging/android/ion/ion_heap.c delete mode 100644 drivers/staging/android/ion/ion_page_pool.c delete mode 100644 drivers/staging/android/ion/ion_system_heap.c delete mode 100644 drivers/staging/android/uapi/ion.h delete mode 100644 tools/testing/selftests/android/Makefile delete mode 100644 tools/testing/selftests/android/config delete mode 100644 tools/testing/selftests/android/ion/.gitignore delete mode 100644 tools/testing/selftests/android/ion/Makefile delete mode 100644 tools/testing/selftests/android/ion/README delete mode 100644 tools/testing/selftests/android/ion/ion.h delete mode 100755 tools/testing/selftests/android/ion/ion_test.sh delete mode 100644 tools/testing/selftests/android/ion/ionapp_export.c delete mode 100644 tools/testing/selftests/android/ion/ionapp_import.c delete mode 100644 tools/testing/selftests/android/ion/ionmap_test.c delete mode 100644 tools/testing/selftests/android/ion/ionutils.c delete mode 100644 tools/testing/selftests/android/ion/ionutils.h delete mode 100644 tools/testing/selftests/android/ion/ipcsocket.c delete mode 100644 tools/testing/selftests/android/ion/ipcsocket.h delete mode 100755 tools/testing/selftests/android/run.sh (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index e73636b75f29..b7980e6033f8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1173,16 +1173,6 @@ S: Supported F: Documentation/devicetree/bindings/rtc/google,goldfish-rtc.txt F: drivers/rtc/rtc-goldfish.c -ANDROID ION DRIVER -M: Laura Abbott -M: Sumit Semwal -L: devel@driverdev.osuosl.org -L: dri-devel@lists.freedesktop.org -L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers) -S: Supported -F: drivers/staging/android/ion -F: drivers/staging/android/uapi/ion.h - AOA (Apple Onboard Audio) ALSA DRIVER M: Johannes Berg L: linuxppc-dev@lists.ozlabs.org diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 8d8fd5c29349..70498adb1575 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -14,8 +14,6 @@ config ASHMEM It is, in theory, a good memory allocator for low-memory devices, because it can discard shared memory units when under memory pressure. -source "drivers/staging/android/ion/Kconfig" - endif # if ANDROID endmenu diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index 3b66cd0b0ec5..e9a55a5e6529 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -I$(src) # needed for trace events -obj-y += ion/ - obj-$(CONFIG_ASHMEM) += ashmem.o diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO index 80eccfaf6db5..f74eb44d8e45 100644 --- a/drivers/staging/android/TODO +++ b/drivers/staging/android/TODO @@ -4,10 +4,5 @@ TODO: - add proper arch dependencies as needed - audit userspace interfaces to make sure they are sane - -ion/ - - Split /dev/ion up into multiple nodes (e.g. /dev/ion/heap0) - - Better test framework (integration with VGEM was suggested) - Please send patches to Greg Kroah-Hartman and Cc: Arve Hjønnevåg and Riley Andrews diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig deleted file mode 100644 index 989fe84a9f9d..000000000000 --- a/drivers/staging/android/ion/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -menuconfig ION - bool "Ion Memory Manager" - depends on HAS_DMA && MMU - select GENERIC_ALLOCATOR - select DMA_SHARED_BUFFER - help - Choose this option to enable the ION Memory Manager, - used by Android to efficiently allocate buffers - from userspace that can be shared between drivers. - If you're not using Android its probably safe to - say N here. - -config ION_SYSTEM_HEAP - bool "Ion system heap" - depends on ION - help - Choose this option to enable the Ion system heap. The system heap - is backed by pages from the buddy allocator. If in doubt, say Y. - -config ION_CMA_HEAP - bool "Ion CMA heap support" - depends on ION && DMA_CMA - help - Choose this option to enable CMA heaps with Ion. This heap is backed - by the Contiguous Memory Allocator (CMA). If your system has these - regions, you should say Y here. diff --git a/drivers/staging/android/ion/Makefile b/drivers/staging/android/ion/Makefile deleted file mode 100644 index 5f4487b1a224..000000000000 --- a/drivers/staging/android/ion/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_ION) += ion.o ion_heap.o -obj-$(CONFIG_ION_SYSTEM_HEAP) += ion_system_heap.o ion_page_pool.o -obj-$(CONFIG_ION_CMA_HEAP) += ion_cma_heap.o diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c deleted file mode 100644 index e1fe03ceb7f1..000000000000 --- a/drivers/staging/android/ion/ion.c +++ /dev/null @@ -1,649 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ION Memory Allocator - * - * Copyright (C) 2011 Google, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ion.h" - -static struct ion_device *internal_dev; -static int heap_id; - -/* this function should only be called while dev->lock is held */ -static struct ion_buffer *ion_buffer_create(struct ion_heap *heap, - struct ion_device *dev, - unsigned long len, - unsigned long flags) -{ - struct ion_buffer *buffer; - int ret; - - buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); - if (!buffer) - return ERR_PTR(-ENOMEM); - - buffer->heap = heap; - buffer->flags = flags; - buffer->dev = dev; - buffer->size = len; - - ret = heap->ops->allocate(heap, buffer, len, flags); - - if (ret) { - if (!(heap->flags & ION_HEAP_FLAG_DEFER_FREE)) - goto err2; - - ion_heap_freelist_drain(heap, 0); - ret = heap->ops->allocate(heap, buffer, len, flags); - if (ret) - goto err2; - } - - if (!buffer->sg_table) { - WARN_ONCE(1, "This heap needs to set the sgtable"); - ret = -EINVAL; - goto err1; - } - - spin_lock(&heap->stat_lock); - heap->num_of_buffers++; - heap->num_of_alloc_bytes += len; - if (heap->num_of_alloc_bytes > heap->alloc_bytes_wm) - heap->alloc_bytes_wm = heap->num_of_alloc_bytes; - spin_unlock(&heap->stat_lock); - - INIT_LIST_HEAD(&buffer->attachments); - mutex_init(&buffer->lock); - return buffer; - -err1: - heap->ops->free(buffer); -err2: - kfree(buffer); - return ERR_PTR(ret); -} - -void ion_buffer_destroy(struct ion_buffer *buffer) -{ - if (buffer->kmap_cnt > 0) { - pr_warn_once("%s: buffer still mapped in the kernel\n", - __func__); - buffer->heap->ops->unmap_kernel(buffer->heap, buffer); - } - buffer->heap->ops->free(buffer); - spin_lock(&buffer->heap->stat_lock); - buffer->heap->num_of_buffers--; - buffer->heap->num_of_alloc_bytes -= buffer->size; - spin_unlock(&buffer->heap->stat_lock); - - kfree(buffer); -} - -static void _ion_buffer_destroy(struct ion_buffer *buffer) -{ - struct ion_heap *heap = buffer->heap; - - if (heap->flags & ION_HEAP_FLAG_DEFER_FREE) - ion_heap_freelist_add(heap, buffer); - else - ion_buffer_destroy(buffer); -} - -static void *ion_buffer_kmap_get(struct ion_buffer *buffer) -{ - void *vaddr; - - if (buffer->kmap_cnt) { - buffer->kmap_cnt++; - return buffer->vaddr; - } - vaddr = buffer->heap->ops->map_kernel(buffer->heap, buffer); - if (WARN_ONCE(!vaddr, - "heap->ops->map_kernel should return ERR_PTR on error")) - return ERR_PTR(-EINVAL); - if (IS_ERR(vaddr)) - return vaddr; - buffer->vaddr = vaddr; - buffer->kmap_cnt++; - return vaddr; -} - -static void ion_buffer_kmap_put(struct ion_buffer *buffer) -{ - buffer->kmap_cnt--; - if (!buffer->kmap_cnt) { - buffer->heap->ops->unmap_kernel(buffer->heap, buffer); - buffer->vaddr = NULL; - } -} - -static struct sg_table *dup_sg_table(struct sg_table *table) -{ - struct sg_table *new_table; - int ret, i; - struct scatterlist *sg, *new_sg; - - new_table = kzalloc(sizeof(*new_table), GFP_KERNEL); - if (!new_table) - return ERR_PTR(-ENOMEM); - - ret = sg_alloc_table(new_table, table->orig_nents, GFP_KERNEL); - if (ret) { - kfree(new_table); - return ERR_PTR(-ENOMEM); - } - - new_sg = new_table->sgl; - for_each_sgtable_sg(table, sg, i) { - memcpy(new_sg, sg, sizeof(*sg)); - new_sg->dma_address = 0; - new_sg = sg_next(new_sg); - } - - return new_table; -} - -static void free_duped_table(struct sg_table *table) -{ - sg_free_table(table); - kfree(table); -} - -struct ion_dma_buf_attachment { - struct device *dev; - struct sg_table *table; - struct list_head list; -}; - -static int ion_dma_buf_attach(struct dma_buf *dmabuf, - struct dma_buf_attachment *attachment) -{ - struct ion_dma_buf_attachment *a; - struct sg_table *table; - struct ion_buffer *buffer = dmabuf->priv; - - a = kzalloc(sizeof(*a), GFP_KERNEL); - if (!a) - return -ENOMEM; - - table = dup_sg_table(buffer->sg_table); - if (IS_ERR(table)) { - kfree(a); - return -ENOMEM; - } - - a->table = table; - a->dev = attachment->dev; - INIT_LIST_HEAD(&a->list); - - attachment->priv = a; - - mutex_lock(&buffer->lock); - list_add(&a->list, &buffer->attachments); - mutex_unlock(&buffer->lock); - - return 0; -} - -static void ion_dma_buf_detach(struct dma_buf *dmabuf, - struct dma_buf_attachment *attachment) -{ - struct ion_dma_buf_attachment *a = attachment->priv; - struct ion_buffer *buffer = dmabuf->priv; - - mutex_lock(&buffer->lock); - list_del(&a->list); - mutex_unlock(&buffer->lock); - free_duped_table(a->table); - - kfree(a); -} - -static struct sg_table *ion_map_dma_buf(struct dma_buf_attachment *attachment, - enum dma_data_direction direction) -{ - struct ion_dma_buf_attachment *a = attachment->priv; - struct sg_table *table; - int ret; - - table = a->table; - - ret = dma_map_sgtable(attachment->dev, table, direction, 0); - if (ret) - return ERR_PTR(ret); - - return table; -} - -static void ion_unmap_dma_buf(struct dma_buf_attachment *attachment, - struct sg_table *table, - enum dma_data_direction direction) -{ - dma_unmap_sgtable(attachment->dev, table, direction, 0); -} - -static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -{ - struct ion_buffer *buffer = dmabuf->priv; - int ret = 0; - - if (!buffer->heap->ops->map_user) { - pr_err("%s: this heap does not define a method for mapping to userspace\n", - __func__); - return -EINVAL; - } - - if (!(buffer->flags & ION_FLAG_CACHED)) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - - mutex_lock(&buffer->lock); - /* now map it to userspace */ - ret = buffer->heap->ops->map_user(buffer->heap, buffer, vma); - mutex_unlock(&buffer->lock); - - if (ret) - pr_err("%s: failure mapping buffer to userspace\n", - __func__); - - return ret; -} - -static void ion_dma_buf_release(struct dma_buf *dmabuf) -{ - struct ion_buffer *buffer = dmabuf->priv; - - _ion_buffer_destroy(buffer); -} - -static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, - enum dma_data_direction direction) -{ - struct ion_buffer *buffer = dmabuf->priv; - void *vaddr; - struct ion_dma_buf_attachment *a; - int ret = 0; - - /* - * TODO: Move this elsewhere because we don't always need a vaddr - */ - if (buffer->heap->ops->map_kernel) { - mutex_lock(&buffer->lock); - vaddr = ion_buffer_kmap_get(buffer); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto unlock; - } - mutex_unlock(&buffer->lock); - } - - mutex_lock(&buffer->lock); - list_for_each_entry(a, &buffer->attachments, list) - dma_sync_sgtable_for_cpu(a->dev, a->table, direction); - -unlock: - mutex_unlock(&buffer->lock); - return ret; -} - -static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf, - enum dma_data_direction direction) -{ - struct ion_buffer *buffer = dmabuf->priv; - struct ion_dma_buf_attachment *a; - - if (buffer->heap->ops->map_kernel) { - mutex_lock(&buffer->lock); - ion_buffer_kmap_put(buffer); - mutex_unlock(&buffer->lock); - } - - mutex_lock(&buffer->lock); - list_for_each_entry(a, &buffer->attachments, list) - dma_sync_sgtable_for_device(a->dev, a->table, direction); - mutex_unlock(&buffer->lock); - - return 0; -} - -static const struct dma_buf_ops dma_buf_ops = { - .map_dma_buf = ion_map_dma_buf, - .unmap_dma_buf = ion_unmap_dma_buf, - .mmap = ion_mmap, - .release = ion_dma_buf_release, - .attach = ion_dma_buf_attach, - .detach = ion_dma_buf_detach, - .begin_cpu_access = ion_dma_buf_begin_cpu_access, - .end_cpu_access = ion_dma_buf_end_cpu_access, -}; - -static int ion_alloc(size_t len, unsigned int heap_id_mask, unsigned int flags) -{ - struct ion_device *dev = internal_dev; - struct ion_buffer *buffer = NULL; - struct ion_heap *heap; - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - int fd; - struct dma_buf *dmabuf; - - pr_debug("%s: len %zu heap_id_mask %u flags %x\n", __func__, - len, heap_id_mask, flags); - /* - * traverse the list of heaps available in this system in priority - * order. If the heap type is supported by the client, and matches the - * request of the caller allocate from it. Repeat until allocate has - * succeeded or all heaps have been tried - */ - len = PAGE_ALIGN(len); - - if (!len) - return -EINVAL; - - down_read(&dev->lock); - plist_for_each_entry(heap, &dev->heaps, node) { - /* if the caller didn't specify this heap id */ - if (!((1 << heap->id) & heap_id_mask)) - continue; - buffer = ion_buffer_create(heap, dev, len, flags); - if (!IS_ERR(buffer)) - break; - } - up_read(&dev->lock); - - if (!buffer) - return -ENODEV; - - if (IS_ERR(buffer)) - return PTR_ERR(buffer); - - exp_info.ops = &dma_buf_ops; - exp_info.size = buffer->size; - exp_info.flags = O_RDWR; - exp_info.priv = buffer; - - dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(dmabuf)) { - _ion_buffer_destroy(buffer); - return PTR_ERR(dmabuf); - } - - fd = dma_buf_fd(dmabuf, O_CLOEXEC); - if (fd < 0) - dma_buf_put(dmabuf); - - return fd; -} - -static int ion_query_heaps(struct ion_heap_query *query) -{ - struct ion_device *dev = internal_dev; - struct ion_heap_data __user *buffer = u64_to_user_ptr(query->heaps); - int ret = -EINVAL, cnt = 0, max_cnt; - struct ion_heap *heap; - struct ion_heap_data hdata; - - memset(&hdata, 0, sizeof(hdata)); - - down_read(&dev->lock); - if (!buffer) { - query->cnt = dev->heap_cnt; - ret = 0; - goto out; - } - - if (query->cnt <= 0) - goto out; - - max_cnt = query->cnt; - - plist_for_each_entry(heap, &dev->heaps, node) { - strncpy(hdata.name, heap->name, MAX_HEAP_NAME); - hdata.name[sizeof(hdata.name) - 1] = '\0'; - hdata.type = heap->type; - hdata.heap_id = heap->id; - - if (copy_to_user(&buffer[cnt], &hdata, sizeof(hdata))) { - ret = -EFAULT; - goto out; - } - - cnt++; - if (cnt >= max_cnt) - break; - } - - query->cnt = cnt; - ret = 0; -out: - up_read(&dev->lock); - return ret; -} - -union ion_ioctl_arg { - struct ion_allocation_data allocation; - struct ion_heap_query query; -}; - -static int validate_ioctl_arg(unsigned int cmd, union ion_ioctl_arg *arg) -{ - switch (cmd) { - case ION_IOC_HEAP_QUERY: - if (arg->query.reserved0 || - arg->query.reserved1 || - arg->query.reserved2) - return -EINVAL; - break; - default: - break; - } - - return 0; -} - -static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - int ret = 0; - union ion_ioctl_arg data; - - if (_IOC_SIZE(cmd) > sizeof(data)) - return -EINVAL; - - /* - * The copy_from_user is unconditional here for both read and write - * to do the validate. If there is no write for the ioctl, the - * buffer is cleared - */ - if (copy_from_user(&data, (void __user *)arg, _IOC_SIZE(cmd))) - return -EFAULT; - - ret = validate_ioctl_arg(cmd, &data); - if (ret) { - pr_warn_once("%s: ioctl validate failed\n", __func__); - return ret; - } - - if (!(_IOC_DIR(cmd) & _IOC_WRITE)) - memset(&data, 0, sizeof(data)); - - switch (cmd) { - case ION_IOC_ALLOC: - { - int fd; - - fd = ion_alloc(data.allocation.len, - data.allocation.heap_id_mask, - data.allocation.flags); - if (fd < 0) - return fd; - - data.allocation.fd = fd; - - break; - } - case ION_IOC_HEAP_QUERY: - ret = ion_query_heaps(&data.query); - break; - default: - return -ENOTTY; - } - - if (_IOC_DIR(cmd) & _IOC_READ) { - if (copy_to_user((void __user *)arg, &data, _IOC_SIZE(cmd))) - return -EFAULT; - } - return ret; -} - -static const struct file_operations ion_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = ion_ioctl, - .compat_ioctl = compat_ptr_ioctl, -}; - -static int debug_shrink_set(void *data, u64 val) -{ - struct ion_heap *heap = data; - struct shrink_control sc; - int objs; - - sc.gfp_mask = GFP_HIGHUSER; - sc.nr_to_scan = val; - - if (!val) { - objs = heap->shrinker.count_objects(&heap->shrinker, &sc); - sc.nr_to_scan = objs; - } - - heap->shrinker.scan_objects(&heap->shrinker, &sc); - return 0; -} - -static int debug_shrink_get(void *data, u64 *val) -{ - struct ion_heap *heap = data; - struct shrink_control sc; - int objs; - - sc.gfp_mask = GFP_HIGHUSER; - sc.nr_to_scan = 0; - - objs = heap->shrinker.count_objects(&heap->shrinker, &sc); - *val = objs; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(debug_shrink_fops, debug_shrink_get, - debug_shrink_set, "%llu\n"); - -void ion_device_add_heap(struct ion_heap *heap) -{ - struct ion_device *dev = internal_dev; - int ret; - struct dentry *heap_root; - char debug_name[64]; - - if (!heap->ops->allocate || !heap->ops->free) - pr_err("%s: can not add heap with invalid ops struct.\n", - __func__); - - spin_lock_init(&heap->free_lock); - spin_lock_init(&heap->stat_lock); - heap->free_list_size = 0; - - if (heap->flags & ION_HEAP_FLAG_DEFER_FREE) - ion_heap_init_deferred_free(heap); - - if ((heap->flags & ION_HEAP_FLAG_DEFER_FREE) || heap->ops->shrink) { - ret = ion_heap_init_shrinker(heap); - if (ret) - pr_err("%s: Failed to register shrinker\n", __func__); - } - - heap->dev = dev; - heap->num_of_buffers = 0; - heap->num_of_alloc_bytes = 0; - heap->alloc_bytes_wm = 0; - - heap_root = debugfs_create_dir(heap->name, dev->debug_root); - debugfs_create_u64("num_of_buffers", - 0444, heap_root, - &heap->num_of_buffers); - debugfs_create_u64("num_of_alloc_bytes", - 0444, - heap_root, - &heap->num_of_alloc_bytes); - debugfs_create_u64("alloc_bytes_wm", - 0444, - heap_root, - &heap->alloc_bytes_wm); - - if (heap->shrinker.count_objects && - heap->shrinker.scan_objects) { - snprintf(debug_name, 64, "%s_shrink", heap->name); - debugfs_create_file(debug_name, - 0644, - heap_root, - heap, - &debug_shrink_fops); - } - - down_write(&dev->lock); - heap->id = heap_id++; - /* - * use negative heap->id to reverse the priority -- when traversing - * the list later attempt higher id numbers first - */ - plist_node_init(&heap->node, -heap->id); - plist_add(&heap->node, &dev->heaps); - - dev->heap_cnt++; - up_write(&dev->lock); -} -EXPORT_SYMBOL(ion_device_add_heap); - -static int ion_device_create(void) -{ - struct ion_device *idev; - int ret; - - idev = kzalloc(sizeof(*idev), GFP_KERNEL); - if (!idev) - return -ENOMEM; - - idev->dev.minor = MISC_DYNAMIC_MINOR; - idev->dev.name = "ion"; - idev->dev.fops = &ion_fops; - idev->dev.parent = NULL; - ret = misc_register(&idev->dev); - if (ret) { - pr_err("ion: failed to register misc device.\n"); - kfree(idev); - return ret; - } - - idev->debug_root = debugfs_create_dir("ion", NULL); - init_rwsem(&idev->lock); - plist_head_init(&idev->heaps); - internal_dev = idev; - return 0; -} -subsys_initcall(ion_device_create); diff --git a/drivers/staging/android/ion/ion.h b/drivers/staging/android/ion/ion.h deleted file mode 100644 index c199e88afc6c..000000000000 --- a/drivers/staging/android/ion/ion.h +++ /dev/null @@ -1,302 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ION Memory Allocator kernel interface header - * - * Copyright (C) 2011 Google, Inc. - */ - -#ifndef _ION_H -#define _ION_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../uapi/ion.h" - -/** - * struct ion_buffer - metadata for a particular buffer - * @list: element in list of deferred freeable buffers - * @dev: back pointer to the ion_device - * @heap: back pointer to the heap the buffer came from - * @flags: buffer specific flags - * @private_flags: internal buffer specific flags - * @size: size of the buffer - * @priv_virt: private data to the buffer representable as - * a void * - * @lock: protects the buffers cnt fields - * @kmap_cnt: number of times the buffer is mapped to the kernel - * @vaddr: the kernel mapping if kmap_cnt is not zero - * @sg_table: the sg table for the buffer - * @attachments: list of devices attached to this buffer - */ -struct ion_buffer { - struct list_head list; - struct ion_device *dev; - struct ion_heap *heap; - unsigned long flags; - unsigned long private_flags; - size_t size; - void *priv_virt; - struct mutex lock; - int kmap_cnt; - void *vaddr; - struct sg_table *sg_table; - struct list_head attachments; -}; - -void ion_buffer_destroy(struct ion_buffer *buffer); - -/** - * struct ion_device - the metadata of the ion device node - * @dev: the actual misc device - * @lock: rwsem protecting the tree of heaps and clients - */ -struct ion_device { - struct miscdevice dev; - struct rw_semaphore lock; - struct plist_head heaps; - struct dentry *debug_root; - int heap_cnt; -}; - -/** - * struct ion_heap_ops - ops to operate on a given heap - * @allocate: allocate memory - * @free: free memory - * @map_kernel map memory to the kernel - * @unmap_kernel unmap memory to the kernel - * @map_user map memory to userspace - * - * allocate, phys, and map_user return 0 on success, -errno on error. - * map_dma and map_kernel return pointer on success, ERR_PTR on - * error. @free will be called with ION_PRIV_FLAG_SHRINKER_FREE set in - * the buffer's private_flags when called from a shrinker. In that - * case, the pages being free'd must be truly free'd back to the - * system, not put in a page pool or otherwise cached. - */ -struct ion_heap_ops { - int (*allocate)(struct ion_heap *heap, - struct ion_buffer *buffer, unsigned long len, - unsigned long flags); - void (*free)(struct ion_buffer *buffer); - void * (*map_kernel)(struct ion_heap *heap, struct ion_buffer *buffer); - void (*unmap_kernel)(struct ion_heap *heap, struct ion_buffer *buffer); - int (*map_user)(struct ion_heap *mapper, struct ion_buffer *buffer, - struct vm_area_struct *vma); - int (*shrink)(struct ion_heap *heap, gfp_t gfp_mask, int nr_to_scan); -}; - -/** - * heap flags - flags between the heaps and core ion code - */ -#define ION_HEAP_FLAG_DEFER_FREE BIT(0) - -/** - * private flags - flags internal to ion - */ -/* - * Buffer is being freed from a shrinker function. Skip any possible - * heap-specific caching mechanism (e.g. page pools). Guarantees that - * any buffer storage that came from the system allocator will be - * returned to the system allocator. - */ -#define ION_PRIV_FLAG_SHRINKER_FREE BIT(0) - -/** - * struct ion_heap - represents a heap in the system - * @node: rb node to put the heap on the device's tree of heaps - * @dev: back pointer to the ion_device - * @type: type of heap - * @ops: ops struct as above - * @flags: flags - * @id: id of heap, also indicates priority of this heap when - * allocating. These are specified by platform data and - * MUST be unique - * @name: used for debugging - * @shrinker: a shrinker for the heap - * @free_list: free list head if deferred free is used - * @free_list_size size of the deferred free list in bytes - * @lock: protects the free list - * @waitqueue: queue to wait on from deferred free thread - * @task: task struct of deferred free thread - * @num_of_buffers the number of currently allocated buffers - * @num_of_alloc_bytes the number of allocated bytes - * @alloc_bytes_wm the number of allocated bytes watermark - * - * Represents a pool of memory from which buffers can be made. In some - * systems the only heap is regular system memory allocated via vmalloc. - * On others, some blocks might require large physically contiguous buffers - * that are allocated from a specially reserved heap. - */ -struct ion_heap { - struct plist_node node; - struct ion_device *dev; - enum ion_heap_type type; - struct ion_heap_ops *ops; - unsigned long flags; - unsigned int id; - const char *name; - - /* deferred free support */ - struct shrinker shrinker; - struct list_head free_list; - size_t free_list_size; - spinlock_t free_lock; - wait_queue_head_t waitqueue; - struct task_struct *task; - - /* heap statistics */ - u64 num_of_buffers; - u64 num_of_alloc_bytes; - u64 alloc_bytes_wm; - - /* protect heap statistics */ - spinlock_t stat_lock; -}; - -/** - * ion_device_add_heap - adds a heap to the ion device - * @heap: the heap to add - */ -void ion_device_add_heap(struct ion_heap *heap); - -/** - * some helpers for common operations on buffers using the sg_table - * and vaddr fields - */ -void *ion_heap_map_kernel(struct ion_heap *heap, struct ion_buffer *buffer); -void ion_heap_unmap_kernel(struct ion_heap *heap, struct ion_buffer *buffer); -int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer, - struct vm_area_struct *vma); -int ion_heap_buffer_zero(struct ion_buffer *buffer); - -/** - * ion_heap_init_shrinker - * @heap: the heap - * - * If a heap sets the ION_HEAP_FLAG_DEFER_FREE flag or defines the shrink op - * this function will be called to setup a shrinker to shrink the freelists - * and call the heap's shrink op. - */ -int ion_heap_init_shrinker(struct ion_heap *heap); - -/** - * ion_heap_init_deferred_free -- initialize deferred free functionality - * @heap: the heap - * - * If a heap sets the ION_HEAP_FLAG_DEFER_FREE flag this function will - * be called to setup deferred frees. Calls to free the buffer will - * return immediately and the actual free will occur some time later - */ -int ion_heap_init_deferred_free(struct ion_heap *heap); - -/** - * ion_heap_freelist_add - add a buffer to the deferred free list - * @heap: the heap - * @buffer: the buffer - * - * Adds an item to the deferred freelist. - */ -void ion_heap_freelist_add(struct ion_heap *heap, struct ion_buffer *buffer); - -/** - * ion_heap_freelist_drain - drain the deferred free list - * @heap: the heap - * @size: amount of memory to drain in bytes - * - * Drains the indicated amount of memory from the deferred freelist immediately. - * Returns the total amount freed. The total freed may be higher depending - * on the size of the items in the list, or lower if there is insufficient - * total memory on the freelist. - */ -size_t ion_heap_freelist_drain(struct ion_heap *heap, size_t size); - -/** - * ion_heap_freelist_shrink - drain the deferred free - * list, skipping any heap-specific - * pooling or caching mechanisms - * - * @heap: the heap - * @size: amount of memory to drain in bytes - * - * Drains the indicated amount of memory from the deferred freelist immediately. - * Returns the total amount freed. The total freed may be higher depending - * on the size of the items in the list, or lower if there is insufficient - * total memory on the freelist. - * - * Unlike with @ion_heap_freelist_drain, don't put any pages back into - * page pools or otherwise cache the pages. Everything must be - * genuinely free'd back to the system. If you're free'ing from a - * shrinker you probably want to use this. Note that this relies on - * the heap.ops.free callback honoring the ION_PRIV_FLAG_SHRINKER_FREE - * flag. - */ -size_t ion_heap_freelist_shrink(struct ion_heap *heap, - size_t size); - -/** - * ion_heap_freelist_size - returns the size of the freelist in bytes - * @heap: the heap - */ -size_t ion_heap_freelist_size(struct ion_heap *heap); - -/** - * functions for creating and destroying a heap pool -- allows you - * to keep a pool of pre allocated memory to use from your heap. Keeping - * a pool of memory that is ready for dma, ie any cached mapping have been - * invalidated from the cache, provides a significant performance benefit on - * many systems - */ - -/** - * struct ion_page_pool - pagepool struct - * @high_count: number of highmem items in the pool - * @low_count: number of lowmem items in the pool - * @high_items: list of highmem items - * @low_items: list of lowmem items - * @mutex: lock protecting this struct and especially the count - * item list - * @gfp_mask: gfp_mask to use from alloc - * @order: order of pages in the pool - * @list: plist node for list of pools - * - * Allows you to keep a pool of pre allocated pages to use from your heap. - * Keeping a pool of pages that is ready for dma, ie any cached mapping have - * been invalidated from the cache, provides a significant performance benefit - * on many systems - */ -struct ion_page_pool { - int high_count; - int low_count; - struct list_head high_items; - struct list_head low_items; - struct mutex mutex; - gfp_t gfp_mask; - unsigned int order; - struct plist_node list; -}; - -struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order); -void ion_page_pool_destroy(struct ion_page_pool *pool); -struct page *ion_page_pool_alloc(struct ion_page_pool *pool); -void ion_page_pool_free(struct ion_page_pool *pool, struct page *page); - -/** ion_page_pool_shrink - shrinks the size of the memory cached in the pool - * @pool: the pool - * @gfp_mask: the memory type to reclaim - * @nr_to_scan: number of items to shrink in pages - * - * returns the number of items freed in pages - */ -int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask, - int nr_to_scan); - -#endif /* _ION_H */ diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c deleted file mode 100644 index bf65e67ef9d8..000000000000 --- a/drivers/staging/android/ion/ion_cma_heap.c +++ /dev/null @@ -1,138 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ION Memory Allocator CMA heap exporter - * - * Copyright (C) Linaro 2012 - * Author: for ST-Ericsson. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "ion.h" - -struct ion_cma_heap { - struct ion_heap heap; - struct cma *cma; -}; - -#define to_cma_heap(x) container_of(x, struct ion_cma_heap, heap) - -/* ION CMA heap operations functions */ -static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, - unsigned long len, - unsigned long flags) -{ - struct ion_cma_heap *cma_heap = to_cma_heap(heap); - struct sg_table *table; - struct page *pages; - unsigned long size = PAGE_ALIGN(len); - unsigned long nr_pages = size >> PAGE_SHIFT; - unsigned long align = get_order(size); - int ret; - - if (align > CONFIG_CMA_ALIGNMENT) - align = CONFIG_CMA_ALIGNMENT; - - pages = cma_alloc(cma_heap->cma, nr_pages, align, false); - if (!pages) - return -ENOMEM; - - if (PageHighMem(pages)) { - unsigned long nr_clear_pages = nr_pages; - struct page *page = pages; - - while (nr_clear_pages > 0) { - void *vaddr = kmap_atomic(page); - - memset(vaddr, 0, PAGE_SIZE); - kunmap_atomic(vaddr); - page++; - nr_clear_pages--; - } - } else { - memset(page_address(pages), 0, size); - } - - table = kmalloc(sizeof(*table), GFP_KERNEL); - if (!table) - goto err; - - ret = sg_alloc_table(table, 1, GFP_KERNEL); - if (ret) - goto free_mem; - - sg_set_page(table->sgl, pages, size, 0); - - buffer->priv_virt = pages; - buffer->sg_table = table; - return 0; - -free_mem: - kfree(table); -err: - cma_release(cma_heap->cma, pages, nr_pages); - return -ENOMEM; -} - -static void ion_cma_free(struct ion_buffer *buffer) -{ - struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap); - struct page *pages = buffer->priv_virt; - unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT; - - /* release memory */ - cma_release(cma_heap->cma, pages, nr_pages); - /* release sg table */ - sg_free_table(buffer->sg_table); - kfree(buffer->sg_table); -} - -static struct ion_heap_ops ion_cma_ops = { - .allocate = ion_cma_allocate, - .free = ion_cma_free, - .map_user = ion_heap_map_user, - .map_kernel = ion_heap_map_kernel, - .unmap_kernel = ion_heap_unmap_kernel, -}; - -static struct ion_heap *__ion_cma_heap_create(struct cma *cma) -{ - struct ion_cma_heap *cma_heap; - - cma_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL); - - if (!cma_heap) - return ERR_PTR(-ENOMEM); - - cma_heap->heap.ops = &ion_cma_ops; - cma_heap->cma = cma; - cma_heap->heap.type = ION_HEAP_TYPE_DMA; - return &cma_heap->heap; -} - -static int __ion_add_cma_heaps(struct cma *cma, void *data) -{ - struct ion_heap *heap; - - heap = __ion_cma_heap_create(cma); - if (IS_ERR(heap)) - return PTR_ERR(heap); - - heap->name = cma_get_name(cma); - - ion_device_add_heap(heap); - return 0; -} - -static int ion_add_cma_heaps(void) -{ - cma_for_each_area(__ion_add_cma_heaps, NULL); - return 0; -} -device_initcall(ion_add_cma_heaps); diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c deleted file mode 100644 index ea7e0a244ffc..000000000000 --- a/drivers/staging/android/ion/ion_heap.c +++ /dev/null @@ -1,286 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ION Memory Allocator generic heap helpers - * - * Copyright (C) 2011 Google, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ion.h" - -void *ion_heap_map_kernel(struct ion_heap *heap, - struct ion_buffer *buffer) -{ - struct sg_page_iter piter; - void *vaddr; - pgprot_t pgprot; - struct sg_table *table = buffer->sg_table; - int npages = PAGE_ALIGN(buffer->size) / PAGE_SIZE; - struct page **pages = vmalloc(array_size(npages, - sizeof(struct page *))); - struct page **tmp = pages; - - if (!pages) - return ERR_PTR(-ENOMEM); - - if (buffer->flags & ION_FLAG_CACHED) - pgprot = PAGE_KERNEL; - else - pgprot = pgprot_writecombine(PAGE_KERNEL); - - for_each_sgtable_page(table, &piter, 0) { - BUG_ON(tmp - pages >= npages); - *tmp++ = sg_page_iter_page(&piter); - } - - vaddr = vmap(pages, npages, VM_MAP, pgprot); - vfree(pages); - - if (!vaddr) - return ERR_PTR(-ENOMEM); - - return vaddr; -} - -void ion_heap_unmap_kernel(struct ion_heap *heap, - struct ion_buffer *buffer) -{ - vunmap(buffer->vaddr); -} - -int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer, - struct vm_area_struct *vma) -{ - struct sg_page_iter piter; - struct sg_table *table = buffer->sg_table; - unsigned long addr = vma->vm_start; - int ret; - - for_each_sgtable_page(table, &piter, vma->vm_pgoff) { - struct page *page = sg_page_iter_page(&piter); - - ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE, - vma->vm_page_prot); - if (ret) - return ret; - addr += PAGE_SIZE; - if (addr >= vma->vm_end) - return 0; - } - - return 0; -} - -static int ion_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot) -{ - void *addr = vmap(pages, num, VM_MAP, pgprot); - - if (!addr) - return -ENOMEM; - memset(addr, 0, PAGE_SIZE * num); - vunmap(addr); - - return 0; -} - -static int ion_heap_sglist_zero(struct sg_table *sgt, pgprot_t pgprot) -{ - int p = 0; - int ret = 0; - struct sg_page_iter piter; - struct page *pages[32]; - - for_each_sgtable_page(sgt, &piter, 0) { - pages[p++] = sg_page_iter_page(&piter); - if (p == ARRAY_SIZE(pages)) { - ret = ion_heap_clear_pages(pages, p, pgprot); - if (ret) - return ret; - p = 0; - } - } - if (p) - ret = ion_heap_clear_pages(pages, p, pgprot); - - return ret; -} - -int ion_heap_buffer_zero(struct ion_buffer *buffer) -{ - struct sg_table *table = buffer->sg_table; - pgprot_t pgprot; - - if (buffer->flags & ION_FLAG_CACHED) - pgprot = PAGE_KERNEL; - else - pgprot = pgprot_writecombine(PAGE_KERNEL); - - return ion_heap_sglist_zero(table, pgprot); -} - -void ion_heap_freelist_add(struct ion_heap *heap, struct ion_buffer *buffer) -{ - spin_lock(&heap->free_lock); - list_add(&buffer->list, &heap->free_list); - heap->free_list_size += buffer->size; - spin_unlock(&heap->free_lock); - wake_up(&heap->waitqueue); -} - -size_t ion_heap_freelist_size(struct ion_heap *heap) -{ - size_t size; - - spin_lock(&heap->free_lock); - size = heap->free_list_size; - spin_unlock(&heap->free_lock); - - return size; -} - -static size_t _ion_heap_freelist_drain(struct ion_heap *heap, size_t size, - bool skip_pools) -{ - struct ion_buffer *buffer; - size_t total_drained = 0; - - if (ion_heap_freelist_size(heap) == 0) - return 0; - - spin_lock(&heap->free_lock); - if (size == 0) - size = heap->free_list_size; - - while (!list_empty(&heap->free_list)) { - if (total_drained >= size) - break; - buffer = list_first_entry(&heap->free_list, struct ion_buffer, - list); - list_del(&buffer->list); - heap->free_list_size -= buffer->size; - if (skip_pools) - buffer->private_flags |= ION_PRIV_FLAG_SHRINKER_FREE; - total_drained += buffer->size; - spin_unlock(&heap->free_lock); - ion_buffer_destroy(buffer); - spin_lock(&heap->free_lock); - } - spin_unlock(&heap->free_lock); - - return total_drained; -} - -size_t ion_heap_freelist_drain(struct ion_heap *heap, size_t size) -{ - return _ion_heap_freelist_drain(heap, size, false); -} - -size_t ion_heap_freelist_shrink(struct ion_heap *heap, size_t size) -{ - return _ion_heap_freelist_drain(heap, size, true); -} - -static int ion_heap_deferred_free(void *data) -{ - struct ion_heap *heap = data; - - while (true) { - struct ion_buffer *buffer; - - wait_event_freezable(heap->waitqueue, - ion_heap_freelist_size(heap) > 0); - - spin_lock(&heap->free_lock); - if (list_empty(&heap->free_list)) { - spin_unlock(&heap->free_lock); - continue; - } - buffer = list_first_entry(&heap->free_list, struct ion_buffer, - list); - list_del(&buffer->list); - heap->free_list_size -= buffer->size; - spin_unlock(&heap->free_lock); - ion_buffer_destroy(buffer); - } - - return 0; -} - -int ion_heap_init_deferred_free(struct ion_heap *heap) -{ - INIT_LIST_HEAD(&heap->free_list); - init_waitqueue_head(&heap->waitqueue); - heap->task = kthread_run(ion_heap_deferred_free, heap, - "%s", heap->name); - if (IS_ERR(heap->task)) { - pr_err("%s: creating thread for deferred free failed\n", - __func__); - return PTR_ERR_OR_ZERO(heap->task); - } - sched_set_normal(heap->task, 19); - - return 0; -} - -static unsigned long ion_heap_shrink_count(struct shrinker *shrinker, - struct shrink_control *sc) -{ - struct ion_heap *heap = container_of(shrinker, struct ion_heap, - shrinker); - int total = 0; - - total = ion_heap_freelist_size(heap) / PAGE_SIZE; - - if (heap->ops->shrink) - total += heap->ops->shrink(heap, sc->gfp_mask, 0); - - return total; -} - -static unsigned long ion_heap_shrink_scan(struct shrinker *shrinker, - struct shrink_control *sc) -{ - struct ion_heap *heap = container_of(shrinker, struct ion_heap, - shrinker); - int freed = 0; - int to_scan = sc->nr_to_scan; - - if (to_scan == 0) - return 0; - - /* - * shrink the free list first, no point in zeroing the memory if we're - * just going to reclaim it. Also, skip any possible page pooling. - */ - if (heap->flags & ION_HEAP_FLAG_DEFER_FREE) - freed = ion_heap_freelist_shrink(heap, to_scan * PAGE_SIZE) / - PAGE_SIZE; - - to_scan -= freed; - if (to_scan <= 0) - return freed; - - if (heap->ops->shrink) - freed += heap->ops->shrink(heap, sc->gfp_mask, to_scan); - - return freed; -} - -int ion_heap_init_shrinker(struct ion_heap *heap) -{ - heap->shrinker.count_objects = ion_heap_shrink_count; - heap->shrinker.scan_objects = ion_heap_shrink_scan; - heap->shrinker.seeks = DEFAULT_SEEKS; - heap->shrinker.batch = 0; - - return register_shrinker(&heap->shrinker); -} diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c deleted file mode 100644 index 0198b886d906..000000000000 --- a/drivers/staging/android/ion/ion_page_pool.c +++ /dev/null @@ -1,155 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ION Memory Allocator page pool helpers - * - * Copyright (C) 2011 Google, Inc. - */ - -#include -#include -#include -#include - -#include "ion.h" - -static inline struct page *ion_page_pool_alloc_pages(struct ion_page_pool *pool) -{ - if (fatal_signal_pending(current)) - return NULL; - return alloc_pages(pool->gfp_mask, pool->order); -} - -static void ion_page_pool_free_pages(struct ion_page_pool *pool, - struct page *page) -{ - __free_pages(page, pool->order); -} - -static void ion_page_pool_add(struct ion_page_pool *pool, struct page *page) -{ - mutex_lock(&pool->mutex); - if (PageHighMem(page)) { - list_add_tail(&page->lru, &pool->high_items); - pool->high_count++; - } else { - list_add_tail(&page->lru, &pool->low_items); - pool->low_count++; - } - - mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, - 1 << pool->order); - mutex_unlock(&pool->mutex); -} - -static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high) -{ - struct page *page; - - if (high) { - BUG_ON(!pool->high_count); - page = list_first_entry(&pool->high_items, struct page, lru); - pool->high_count--; - } else { - BUG_ON(!pool->low_count); - page = list_first_entry(&pool->low_items, struct page, lru); - pool->low_count--; - } - - list_del(&page->lru); - mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, - -(1 << pool->order)); - return page; -} - -struct page *ion_page_pool_alloc(struct ion_page_pool *pool) -{ - struct page *page = NULL; - - BUG_ON(!pool); - - mutex_lock(&pool->mutex); - if (pool->high_count) - page = ion_page_pool_remove(pool, true); - else if (pool->low_count) - page = ion_page_pool_remove(pool, false); - mutex_unlock(&pool->mutex); - - if (!page) - page = ion_page_pool_alloc_pages(pool); - - return page; -} - -void ion_page_pool_free(struct ion_page_pool *pool, struct page *page) -{ - BUG_ON(pool->order != compound_order(page)); - - ion_page_pool_add(pool, page); -} - -static int ion_page_pool_total(struct ion_page_pool *pool, bool high) -{ - int count = pool->low_count; - - if (high) - count += pool->high_count; - - return count << pool->order; -} - -int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask, - int nr_to_scan) -{ - int freed = 0; - bool high; - - if (current_is_kswapd()) - high = true; - else - high = !!(gfp_mask & __GFP_HIGHMEM); - - if (nr_to_scan == 0) - return ion_page_pool_total(pool, high); - - while (freed < nr_to_scan) { - struct page *page; - - mutex_lock(&pool->mutex); - if (pool->low_count) { - page = ion_page_pool_remove(pool, false); - } else if (high && pool->high_count) { - page = ion_page_pool_remove(pool, true); - } else { - mutex_unlock(&pool->mutex); - break; - } - mutex_unlock(&pool->mutex); - ion_page_pool_free_pages(pool, page); - freed += (1 << pool->order); - } - - return freed; -} - -struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order) -{ - struct ion_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL); - - if (!pool) - return NULL; - pool->high_count = 0; - pool->low_count = 0; - INIT_LIST_HEAD(&pool->low_items); - INIT_LIST_HEAD(&pool->high_items); - pool->gfp_mask = gfp_mask | __GFP_COMP; - pool->order = order; - mutex_init(&pool->mutex); - plist_node_init(&pool->list, order); - - return pool; -} - -void ion_page_pool_destroy(struct ion_page_pool *pool) -{ - kfree(pool); -} diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c deleted file mode 100644 index eac0632ab4e8..000000000000 --- a/drivers/staging/android/ion/ion_system_heap.c +++ /dev/null @@ -1,377 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ION Memory Allocator system heap exporter - * - * Copyright (C) 2011 Google, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ion.h" - -#define NUM_ORDERS ARRAY_SIZE(orders) - -static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN | - __GFP_NORETRY) & ~__GFP_RECLAIM; -static gfp_t low_order_gfp_flags = GFP_HIGHUSER | __GFP_ZERO; -static const unsigned int orders[] = {8, 4, 0}; - -static int order_to_index(unsigned int order) -{ - int i; - - for (i = 0; i < NUM_ORDERS; i++) - if (order == orders[i]) - return i; - BUG(); - return -1; -} - -static inline unsigned int order_to_size(int order) -{ - return PAGE_SIZE << order; -} - -struct ion_system_heap { - struct ion_heap heap; - struct ion_page_pool *pools[NUM_ORDERS]; -}; - -static struct page *alloc_buffer_page(struct ion_system_heap *heap, - struct ion_buffer *buffer, - unsigned long order) -{ - struct ion_page_pool *pool = heap->pools[order_to_index(order)]; - - return ion_page_pool_alloc(pool); -} - -static void free_buffer_page(struct ion_system_heap *heap, - struct ion_buffer *buffer, struct page *page) -{ - struct ion_page_pool *pool; - unsigned int order = compound_order(page); - - /* go to system */ - if (buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE) { - __free_pages(page, order); - return; - } - - pool = heap->pools[order_to_index(order)]; - - ion_page_pool_free(pool, page); -} - -static struct page *alloc_largest_available(struct ion_system_heap *heap, - struct ion_buffer *buffer, - unsigned long size, - unsigned int max_order) -{ - struct page *page; - int i; - - for (i = 0; i < NUM_ORDERS; i++) { - if (size < order_to_size(orders[i])) - continue; - if (max_order < orders[i]) - continue; - - page = alloc_buffer_page(heap, buffer, orders[i]); - if (!page) - continue; - - return page; - } - - return NULL; -} - -static int ion_system_heap_allocate(struct ion_heap *heap, - struct ion_buffer *buffer, - unsigned long size, - unsigned long flags) -{ - struct ion_system_heap *sys_heap = container_of(heap, - struct ion_system_heap, - heap); - struct sg_table *table; - struct scatterlist *sg; - struct list_head pages; - struct page *page, *tmp_page; - int i = 0; - unsigned long size_remaining = PAGE_ALIGN(size); - unsigned int max_order = orders[0]; - - if (size / PAGE_SIZE > totalram_pages() / 2) - return -ENOMEM; - - INIT_LIST_HEAD(&pages); - while (size_remaining > 0) { - page = alloc_largest_available(sys_heap, buffer, size_remaining, - max_order); - if (!page) - goto free_pages; - list_add_tail(&page->lru, &pages); - size_remaining -= page_size(page); - max_order = compound_order(page); - i++; - } - table = kmalloc(sizeof(*table), GFP_KERNEL); - if (!table) - goto free_pages; - - if (sg_alloc_table(table, i, GFP_KERNEL)) - goto free_table; - - sg = table->sgl; - list_for_each_entry_safe(page, tmp_page, &pages, lru) { - sg_set_page(sg, page, page_size(page), 0); - sg = sg_next(sg); - list_del(&page->lru); - } - - buffer->sg_table = table; - return 0; - -free_table: - kfree(table); -free_pages: - list_for_each_entry_safe(page, tmp_page, &pages, lru) - free_buffer_page(sys_heap, buffer, page); - return -ENOMEM; -} - -static void ion_system_heap_free(struct ion_buffer *buffer) -{ - struct ion_system_heap *sys_heap = container_of(buffer->heap, - struct ion_system_heap, - heap); - struct sg_table *table = buffer->sg_table; - struct scatterlist *sg; - int i; - - /* zero the buffer before goto page pool */ - if (!(buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE)) - ion_heap_buffer_zero(buffer); - - for_each_sgtable_sg(table, sg, i) - free_buffer_page(sys_heap, buffer, sg_page(sg)); - sg_free_table(table); - kfree(table); -} - -static int ion_system_heap_shrink(struct ion_heap *heap, gfp_t gfp_mask, - int nr_to_scan) -{ - struct ion_page_pool *pool; - struct ion_system_heap *sys_heap; - int nr_total = 0; - int i, nr_freed; - int only_scan = 0; - - sys_heap = container_of(heap, struct ion_system_heap, heap); - - if (!nr_to_scan) - only_scan = 1; - - for (i = 0; i < NUM_ORDERS; i++) { - pool = sys_heap->pools[i]; - - if (only_scan) { - nr_total += ion_page_pool_shrink(pool, - gfp_mask, - nr_to_scan); - - } else { - nr_freed = ion_page_pool_shrink(pool, - gfp_mask, - nr_to_scan); - nr_to_scan -= nr_freed; - nr_total += nr_freed; - if (nr_to_scan <= 0) - break; - } - } - return nr_total; -} - -static struct ion_heap_ops system_heap_ops = { - .allocate = ion_system_heap_allocate, - .free = ion_system_heap_free, - .map_kernel = ion_heap_map_kernel, - .unmap_kernel = ion_heap_unmap_kernel, - .map_user = ion_heap_map_user, - .shrink = ion_system_heap_shrink, -}; - -static void ion_system_heap_destroy_pools(struct ion_page_pool **pools) -{ - int i; - - for (i = 0; i < NUM_ORDERS; i++) - if (pools[i]) - ion_page_pool_destroy(pools[i]); -} - -static int ion_system_heap_create_pools(struct ion_page_pool **pools) -{ - int i; - - for (i = 0; i < NUM_ORDERS; i++) { - struct ion_page_pool *pool; - gfp_t gfp_flags = low_order_gfp_flags; - - if (orders[i] > 4) - gfp_flags = high_order_gfp_flags; - - pool = ion_page_pool_create(gfp_flags, orders[i]); - if (!pool) - goto err_create_pool; - pools[i] = pool; - } - - return 0; - -err_create_pool: - ion_system_heap_destroy_pools(pools); - return -ENOMEM; -} - -static struct ion_heap *__ion_system_heap_create(void) -{ - struct ion_system_heap *heap; - - heap = kzalloc(sizeof(*heap), GFP_KERNEL); - if (!heap) - return ERR_PTR(-ENOMEM); - heap->heap.ops = &system_heap_ops; - heap->heap.type = ION_HEAP_TYPE_SYSTEM; - heap->heap.flags = ION_HEAP_FLAG_DEFER_FREE; - - if (ion_system_heap_create_pools(heap->pools)) - goto free_heap; - - return &heap->heap; - -free_heap: - kfree(heap); - return ERR_PTR(-ENOMEM); -} - -static int ion_system_heap_create(void) -{ - struct ion_heap *heap; - - heap = __ion_system_heap_create(); - if (IS_ERR(heap)) - return PTR_ERR(heap); - heap->name = "ion_system_heap"; - - ion_device_add_heap(heap); - - return 0; -} -device_initcall(ion_system_heap_create); - -static int ion_system_contig_heap_allocate(struct ion_heap *heap, - struct ion_buffer *buffer, - unsigned long len, - unsigned long flags) -{ - int order = get_order(len); - struct page *page; - struct sg_table *table; - unsigned long i; - int ret; - - page = alloc_pages(low_order_gfp_flags | __GFP_NOWARN, order); - if (!page) - return -ENOMEM; - - split_page(page, order); - - len = PAGE_ALIGN(len); - for (i = len >> PAGE_SHIFT; i < (1 << order); i++) - __free_page(page + i); - - table = kmalloc(sizeof(*table), GFP_KERNEL); - if (!table) { - ret = -ENOMEM; - goto free_pages; - } - - ret = sg_alloc_table(table, 1, GFP_KERNEL); - if (ret) - goto free_table; - - sg_set_page(table->sgl, page, len, 0); - - buffer->sg_table = table; - - return 0; - -free_table: - kfree(table); -free_pages: - for (i = 0; i < len >> PAGE_SHIFT; i++) - __free_page(page + i); - - return ret; -} - -static void ion_system_contig_heap_free(struct ion_buffer *buffer) -{ - struct sg_table *table = buffer->sg_table; - struct page *page = sg_page(table->sgl); - unsigned long pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT; - unsigned long i; - - for (i = 0; i < pages; i++) - __free_page(page + i); - sg_free_table(table); - kfree(table); -} - -static struct ion_heap_ops kmalloc_ops = { - .allocate = ion_system_contig_heap_allocate, - .free = ion_system_contig_heap_free, - .map_kernel = ion_heap_map_kernel, - .unmap_kernel = ion_heap_unmap_kernel, - .map_user = ion_heap_map_user, -}; - -static struct ion_heap *__ion_system_contig_heap_create(void) -{ - struct ion_heap *heap; - - heap = kzalloc(sizeof(*heap), GFP_KERNEL); - if (!heap) - return ERR_PTR(-ENOMEM); - heap->ops = &kmalloc_ops; - heap->type = ION_HEAP_TYPE_SYSTEM_CONTIG; - heap->name = "ion_system_contig_heap"; - - return heap; -} - -static int ion_system_contig_heap_create(void) -{ - struct ion_heap *heap; - - heap = __ion_system_contig_heap_create(); - if (IS_ERR(heap)) - return PTR_ERR(heap); - - ion_device_add_heap(heap); - - return 0; -} -device_initcall(ion_system_contig_heap_create); diff --git a/drivers/staging/android/uapi/ion.h b/drivers/staging/android/uapi/ion.h deleted file mode 100644 index 46c93fcb46d6..000000000000 --- a/drivers/staging/android/uapi/ion.h +++ /dev/null @@ -1,127 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * drivers/staging/android/uapi/ion.h - * - * Copyright (C) 2011 Google, Inc. - */ - -#ifndef _UAPI_LINUX_ION_H -#define _UAPI_LINUX_ION_H - -#include -#include - -/** - * enum ion_heap_types - list of all possible types of heaps - * @ION_HEAP_TYPE_SYSTEM: memory allocated via vmalloc - * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc - * @ION_HEAP_TYPE_CARVEOUT: memory allocated from a prereserved - * carveout heap, allocations are physically - * contiguous - * @ION_HEAP_TYPE_DMA: memory allocated via DMA API - * @ION_NUM_HEAPS: helper for iterating over heaps, a bit mask - * is used to identify the heaps, so only 32 - * total heap types are supported - */ -enum ion_heap_type { - ION_HEAP_TYPE_SYSTEM, - ION_HEAP_TYPE_SYSTEM_CONTIG, - ION_HEAP_TYPE_CARVEOUT, - ION_HEAP_TYPE_CHUNK, - ION_HEAP_TYPE_DMA, - ION_HEAP_TYPE_CUSTOM, /* - * must be last so device specific heaps always - * are at the end of this enum - */ -}; - -#define ION_NUM_HEAP_IDS (sizeof(unsigned int) * 8) - -/** - * allocation flags - the lower 16 bits are used by core ion, the upper 16 - * bits are reserved for use by the heaps themselves. - */ - -/* - * mappings of this buffer should be cached, ion will do cache maintenance - * when the buffer is mapped for dma - */ -#define ION_FLAG_CACHED 1 - -/** - * DOC: Ion Userspace API - * - * create a client by opening /dev/ion - * most operations handled via following ioctls - * - */ - -/** - * struct ion_allocation_data - metadata passed from userspace for allocations - * @len: size of the allocation - * @heap_id_mask: mask of heap ids to allocate from - * @flags: flags passed to heap - * @handle: pointer that will be populated with a cookie to use to - * refer to this allocation - * - * Provided by userspace as an argument to the ioctl - */ -struct ion_allocation_data { - __u64 len; - __u32 heap_id_mask; - __u32 flags; - __u32 fd; - __u32 unused; -}; - -#define MAX_HEAP_NAME 32 - -/** - * struct ion_heap_data - data about a heap - * @name - first 32 characters of the heap name - * @type - heap type - * @heap_id - heap id for the heap - */ -struct ion_heap_data { - char name[MAX_HEAP_NAME]; - __u32 type; - __u32 heap_id; - __u32 reserved0; - __u32 reserved1; - __u32 reserved2; -}; - -/** - * struct ion_heap_query - collection of data about all heaps - * @cnt - total number of heaps to be copied - * @heaps - buffer to copy heap data - */ -struct ion_heap_query { - __u32 cnt; /* Total number of heaps to be copied */ - __u32 reserved0; /* align to 64bits */ - __u64 heaps; /* buffer to be populated */ - __u32 reserved1; - __u32 reserved2; -}; - -#define ION_IOC_MAGIC 'I' - -/** - * DOC: ION_IOC_ALLOC - allocate memory - * - * Takes an ion_allocation_data struct and returns it with the handle field - * populated with the opaque handle for the allocation. - */ -#define ION_IOC_ALLOC _IOWR(ION_IOC_MAGIC, 0, \ - struct ion_allocation_data) - -/** - * DOC: ION_IOC_HEAP_QUERY - information about available heaps - * - * Takes an ion_heap_query structure and populates information about - * available Ion heaps. - */ -#define ION_IOC_HEAP_QUERY _IOWR(ION_IOC_MAGIC, 8, \ - struct ion_heap_query) - -#endif /* _UAPI_LINUX_ION_H */ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d9c283503159..924cbcc22e0d 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TARGETS = android -TARGETS += arm64 +TARGETS = arm64 TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile deleted file mode 100644 index 9258306cafe9..000000000000 --- a/tools/testing/selftests/android/Makefile +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -SUBDIRS := ion - -TEST_PROGS := run.sh - -.PHONY: all clean - -include ../lib.mk - -all: - @for DIR in $(SUBDIRS); do \ - BUILD_TARGET=$(OUTPUT)/$$DIR; \ - mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ - #SUBDIR test prog name should be in the form: SUBDIR_test.sh \ - TEST=$$DIR"_test.sh"; \ - if [ -e $$DIR/$$TEST ]; then \ - rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \ - fi \ - done - -override define INSTALL_RULE - mkdir -p $(INSTALL_PATH) -install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) - - @for SUBDIR in $(SUBDIRS); do \ - BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \ - mkdir $$BUILD_TARGET -p; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \ - done; -endef - -override define CLEAN - @for DIR in $(SUBDIRS); do \ - BUILD_TARGET=$(OUTPUT)/$$DIR; \ - mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ - done -endef diff --git a/tools/testing/selftests/android/config b/tools/testing/selftests/android/config deleted file mode 100644 index b4ad748a9dd9..000000000000 --- a/tools/testing/selftests/android/config +++ /dev/null @@ -1,5 +0,0 @@ -CONFIG_ANDROID=y -CONFIG_STAGING=y -CONFIG_ION=y -CONFIG_ION_SYSTEM_HEAP=y -CONFIG_DRM_VGEM=y diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore deleted file mode 100644 index 78eae9972bb1..000000000000 --- a/tools/testing/selftests/android/ion/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ionapp_export -ionapp_import -ionmap_test diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile deleted file mode 100644 index 42b71f005332..000000000000 --- a/tools/testing/selftests/android/ion/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/ -CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g - -TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test - -all: $(TEST_GEN_FILES) - -$(TEST_GEN_FILES): ipcsocket.c ionutils.c - -TEST_PROGS := ion_test.sh - -KSFT_KHDR_INSTALL := 1 -top_srcdir = ../../../../.. -include ../../lib.mk - -$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c -$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c -$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c ipcsocket.c diff --git a/tools/testing/selftests/android/ion/README b/tools/testing/selftests/android/ion/README deleted file mode 100644 index 21783e9c451e..000000000000 --- a/tools/testing/selftests/android/ion/README +++ /dev/null @@ -1,101 +0,0 @@ -ION BUFFER SHARING UTILITY -========================== -File: ion_test.sh : Utility to test ION driver buffer sharing mechanism. -Author: Pintu Kumar - -Introduction: -------------- -This is a test utility to verify ION buffer sharing in user space -between 2 independent processes. -It uses unix domain socket (with SCM_RIGHTS) as IPC to transfer an FD to -another process to share the same buffer. -This utility demonstrates how ION buffer sharing can be implemented between -two user space processes, using various heap types. -The following heap types are supported by ION driver. -ION_HEAP_TYPE_SYSTEM (0) -ION_HEAP_TYPE_SYSTEM_CONTIG (1) -ION_HEAP_TYPE_CARVEOUT (2) -ION_HEAP_TYPE_CHUNK (3) -ION_HEAP_TYPE_DMA (4) - -By default only the SYSTEM and SYSTEM_CONTIG heaps are supported. -Each heap is associated with the respective heap id. -This utility is designed in the form of client/server program. -The server part (ionapp_export) is the exporter of the buffer. -It is responsible for creating an ION client, allocating the buffer based on -the heap id, writing some data to this buffer and then exporting the FD -(associated with this buffer) to another process using socket IPC. -This FD is called as buffer FD (which is different than the ION client FD). - -The client part (ionapp_import) is the importer of the buffer. -It retrives the FD from the socket data and installs into its address space. -This new FD internally points to the same kernel buffer. -So first it reads the data that is stored in this buffer and prints it. -Then it writes the different size of data (it could be different data) to the -same buffer. -Finally the buffer FD must be closed by both the exporter and importer. -Thus the same kernel buffer is shared among two user space processes using -ION driver and only one time allocation. - -Prerequisite: -------------- -This utility works only if /dev/ion interface is present. -The following configs needs to be enabled in kernel to include ion driver. -CONFIG_ANDROID=y -CONFIG_STAGING=y -CONFIG_ION=y -CONFIG_ION_SYSTEM_HEAP=y - -This utility requires to be run as root user. - - -Compile and test: ------------------ -This utility is made to be run as part of kselftest framework in kernel. -To compile and run using kselftest you can simply do the following from the -kernel top directory. -linux$ make TARGETS=android kselftest -Or you can also use: -linux$ make -C tools/testing/selftests TARGETS=android run_tests -Using the selftest it can directly execute the ion_test.sh script to test the -buffer sharing using ion system heap. -Currently the heap size is hard coded as just 10 bytes inside this script. -You need to be a root user to run under selftest. - -You can also compile and test manually using the following steps: -ion$ make -These will generate 2 executable: ionapp_export, ionapp_import -Now you can run the export and import manually by specifying the heap type -and the heap size. -You can also directly execute the shell script to run the test automatically. -Simply use the following command to run the test. -ion$ sudo ./ion_test.sh - -Test Results: -------------- -The utility is verified on Ubuntu-32 bit system with Linux Kernel 4.14. -Here is the snapshot of the test result using kselftest. - -linux# make TARGETS=android kselftest -heap_type: 0, heap_size: 10 --------------------------------------- -heap type: 0 - heap id: 1 -heap name: ion_system_heap --------------------------------------- -Fill buffer content: -0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd -Sharing fd: 6, Client fd: 5 -: buffer release successfully.... -Received buffer fd: 4 -Read buffer content: -0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0x0 0x0 0x0 0x0 0x0 0x0 -0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 -Fill buffer content: -0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd -0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd -0xfd 0xfd -: buffer release successfully.... -ion_test.sh: heap_type: 0 - [PASS] - -ion_test.sh: done diff --git a/tools/testing/selftests/android/ion/ion.h b/tools/testing/selftests/android/ion/ion.h deleted file mode 100644 index 33db23018abf..000000000000 --- a/tools/testing/selftests/android/ion/ion.h +++ /dev/null @@ -1,134 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * ion.h - * - * Copyright (C) 2011 Google, Inc. - */ - -/* This file is copied from drivers/staging/android/uapi/ion.h - * This local copy is required for the selftest to pass, when build - * outside the kernel source tree. - * Please keep this file in sync with its original file until the - * ion driver is moved outside the staging tree. - */ - -#ifndef _UAPI_LINUX_ION_H -#define _UAPI_LINUX_ION_H - -#include -#include - -/** - * enum ion_heap_types - list of all possible types of heaps - * @ION_HEAP_TYPE_SYSTEM: memory allocated via vmalloc - * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc - * @ION_HEAP_TYPE_CARVEOUT: memory allocated from a prereserved - * carveout heap, allocations are physically - * contiguous - * @ION_HEAP_TYPE_DMA: memory allocated via DMA API - * @ION_NUM_HEAPS: helper for iterating over heaps, a bit mask - * is used to identify the heaps, so only 32 - * total heap types are supported - */ -enum ion_heap_type { - ION_HEAP_TYPE_SYSTEM, - ION_HEAP_TYPE_SYSTEM_CONTIG, - ION_HEAP_TYPE_CARVEOUT, - ION_HEAP_TYPE_CHUNK, - ION_HEAP_TYPE_DMA, - ION_HEAP_TYPE_CUSTOM, /* - * must be last so device specific heaps always - * are at the end of this enum - */ -}; - -#define ION_NUM_HEAP_IDS (sizeof(unsigned int) * 8) - -/** - * allocation flags - the lower 16 bits are used by core ion, the upper 16 - * bits are reserved for use by the heaps themselves. - */ - -/* - * mappings of this buffer should be cached, ion will do cache maintenance - * when the buffer is mapped for dma - */ -#define ION_FLAG_CACHED 1 - -/** - * DOC: Ion Userspace API - * - * create a client by opening /dev/ion - * most operations handled via following ioctls - * - */ - -/** - * struct ion_allocation_data - metadata passed from userspace for allocations - * @len: size of the allocation - * @heap_id_mask: mask of heap ids to allocate from - * @flags: flags passed to heap - * @handle: pointer that will be populated with a cookie to use to - * refer to this allocation - * - * Provided by userspace as an argument to the ioctl - */ -struct ion_allocation_data { - __u64 len; - __u32 heap_id_mask; - __u32 flags; - __u32 fd; - __u32 unused; -}; - -#define MAX_HEAP_NAME 32 - -/** - * struct ion_heap_data - data about a heap - * @name - first 32 characters of the heap name - * @type - heap type - * @heap_id - heap id for the heap - */ -struct ion_heap_data { - char name[MAX_HEAP_NAME]; - __u32 type; - __u32 heap_id; - __u32 reserved0; - __u32 reserved1; - __u32 reserved2; -}; - -/** - * struct ion_heap_query - collection of data about all heaps - * @cnt - total number of heaps to be copied - * @heaps - buffer to copy heap data - */ -struct ion_heap_query { - __u32 cnt; /* Total number of heaps to be copied */ - __u32 reserved0; /* align to 64bits */ - __u64 heaps; /* buffer to be populated */ - __u32 reserved1; - __u32 reserved2; -}; - -#define ION_IOC_MAGIC 'I' - -/** - * DOC: ION_IOC_ALLOC - allocate memory - * - * Takes an ion_allocation_data struct and returns it with the handle field - * populated with the opaque handle for the allocation. - */ -#define ION_IOC_ALLOC _IOWR(ION_IOC_MAGIC, 0, \ - struct ion_allocation_data) - -/** - * DOC: ION_IOC_HEAP_QUERY - information about available heaps - * - * Takes an ion_heap_query structure and populates information about - * available Ion heaps. - */ -#define ION_IOC_HEAP_QUERY _IOWR(ION_IOC_MAGIC, 8, \ - struct ion_heap_query) - -#endif /* _UAPI_LINUX_ION_H */ diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh deleted file mode 100755 index 69e676cfc94e..000000000000 --- a/tools/testing/selftests/android/ion/ion_test.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -heapsize=4096 -TCID="ion_test.sh" -errcode=0 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -run_test() -{ - heaptype=$1 - ./ionapp_export -i $heaptype -s $heapsize & - sleep 1 - ./ionapp_import - if [ $? -ne 0 ]; then - echo "$TCID: heap_type: $heaptype - [FAIL]" - errcode=1 - else - echo "$TCID: heap_type: $heaptype - [PASS]" - fi - sleep 1 - echo "" -} - -check_root() -{ - uid=$(id -u) - if [ $uid -ne 0 ]; then - echo $TCID: must be run as root >&2 - exit $ksft_skip - fi -} - -check_device() -{ - DEVICE=/dev/ion - if [ ! -e $DEVICE ]; then - echo $TCID: No $DEVICE device found >&2 - echo $TCID: May be CONFIG_ION is not set >&2 - exit $ksft_skip - fi -} - -main_function() -{ - check_device - check_root - - # ION_SYSTEM_HEAP TEST - run_test 0 - # ION_SYSTEM_CONTIG_HEAP TEST - run_test 1 -} - -main_function -echo "$TCID: done" -exit $errcode diff --git a/tools/testing/selftests/android/ion/ionapp_export.c b/tools/testing/selftests/android/ion/ionapp_export.c deleted file mode 100644 index 063b7830d1bd..000000000000 --- a/tools/testing/selftests/android/ion/ionapp_export.c +++ /dev/null @@ -1,127 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ionapp_export.c - * - * It is a user space utility to create and export android - * ion memory buffer fd to another process using unix domain socket as IPC. - * This acts like a server for ionapp_import(client). - * So, this server has to be started first before the client. - * - * Copyright (C) 2017 Pintu Kumar - */ - -#include -#include -#include -#include -#include -#include -#include "ionutils.h" -#include "ipcsocket.h" - - -void print_usage(int argc, char *argv[]) -{ - printf("Usage: %s [-h ] [-i ] [-s ]\n", - argv[0]); -} - -int main(int argc, char *argv[]) -{ - int opt, ret, status, heapid; - int sockfd, client_fd, shared_fd; - unsigned char *map_buf; - unsigned long map_len, heap_type, heap_size, flags; - struct ion_buffer_info info; - struct socket_info skinfo; - - if (argc < 2) { - print_usage(argc, argv); - return -1; - } - - heap_size = 0; - flags = 0; - heap_type = ION_HEAP_TYPE_SYSTEM; - - while ((opt = getopt(argc, argv, "hi:s:")) != -1) { - switch (opt) { - case 'h': - print_usage(argc, argv); - exit(0); - break; - case 'i': - heapid = atoi(optarg); - switch (heapid) { - case 0: - heap_type = ION_HEAP_TYPE_SYSTEM; - break; - case 1: - heap_type = ION_HEAP_TYPE_SYSTEM_CONTIG; - break; - default: - printf("ERROR: heap type not supported\n"); - exit(1); - } - break; - case 's': - heap_size = atoi(optarg); - break; - default: - print_usage(argc, argv); - exit(1); - break; - } - } - - if (heap_size <= 0) { - printf("heap_size cannot be 0\n"); - print_usage(argc, argv); - exit(1); - } - - printf("heap_type: %ld, heap_size: %ld\n", heap_type, heap_size); - info.heap_type = heap_type; - info.heap_size = heap_size; - info.flag_type = flags; - - /* This is server: open the socket connection first */ - /* Here; 1 indicates server or exporter */ - status = opensocket(&sockfd, SOCKET_NAME, 1); - if (status < 0) { - fprintf(stderr, "<%s>: Failed opensocket.\n", __func__); - goto err_socket; - } - skinfo.sockfd = sockfd; - - ret = ion_export_buffer_fd(&info); - if (ret < 0) { - fprintf(stderr, "FAILED: ion_get_buffer_fd\n"); - goto err_export; - } - client_fd = info.ionfd; - shared_fd = info.buffd; - map_buf = info.buffer; - map_len = info.buflen; - write_buffer(map_buf, map_len); - - /* share ion buf fd with other user process */ - printf("Sharing fd: %d, Client fd: %d\n", shared_fd, client_fd); - skinfo.datafd = shared_fd; - skinfo.buflen = map_len; - - ret = socket_send_fd(&skinfo); - if (ret < 0) { - fprintf(stderr, "FAILED: socket_send_fd\n"); - goto err_send; - } - -err_send: -err_export: - ion_close_buffer_fd(&info); - -err_socket: - closesocket(sockfd, SOCKET_NAME); - - return 0; -} diff --git a/tools/testing/selftests/android/ion/ionapp_import.c b/tools/testing/selftests/android/ion/ionapp_import.c deleted file mode 100644 index 54b580cb04f6..000000000000 --- a/tools/testing/selftests/android/ion/ionapp_import.c +++ /dev/null @@ -1,79 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ionapp_import.c - * - * It is a user space utility to receive android ion memory buffer fd - * over unix domain socket IPC that can be exported by ionapp_export. - * This acts like a client for ionapp_export. - * - * Copyright (C) 2017 Pintu Kumar - */ - -#include -#include -#include -#include -#include "ionutils.h" -#include "ipcsocket.h" - - -int main(void) -{ - int ret, status; - int sockfd, shared_fd; - unsigned char *map_buf; - unsigned long map_len; - struct ion_buffer_info info; - struct socket_info skinfo; - - /* This is the client part. Here 0 means client or importer */ - status = opensocket(&sockfd, SOCKET_NAME, 0); - if (status < 0) { - fprintf(stderr, "No exporter exists...\n"); - ret = status; - goto err_socket; - } - - skinfo.sockfd = sockfd; - - ret = socket_receive_fd(&skinfo); - if (ret < 0) { - fprintf(stderr, "Failed: socket_receive_fd\n"); - goto err_recv; - } - - shared_fd = skinfo.datafd; - printf("Received buffer fd: %d\n", shared_fd); - if (shared_fd <= 0) { - fprintf(stderr, "ERROR: improper buf fd\n"); - ret = -1; - goto err_fd; - } - - memset(&info, 0, sizeof(info)); - info.buffd = shared_fd; - info.buflen = ION_BUFFER_LEN; - - ret = ion_import_buffer_fd(&info); - if (ret < 0) { - fprintf(stderr, "Failed: ion_use_buffer_fd\n"); - goto err_import; - } - - map_buf = info.buffer; - map_len = info.buflen; - read_buffer(map_buf, map_len); - - /* Write probably new data to the same buffer again */ - map_len = ION_BUFFER_LEN; - write_buffer(map_buf, map_len); - -err_import: - ion_close_buffer_fd(&info); -err_fd: -err_recv: -err_socket: - closesocket(sockfd, SOCKET_NAME); - - return ret; -} diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c deleted file mode 100644 index dab36b06b37d..000000000000 --- a/tools/testing/selftests/android/ion/ionmap_test.c +++ /dev/null @@ -1,136 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include - -#include "ion.h" -#include "ionutils.h" - -int check_vgem(int fd) -{ - drm_version_t version = { 0 }; - char name[5]; - int ret; - - version.name_len = 4; - version.name = name; - - ret = ioctl(fd, DRM_IOCTL_VERSION, &version); - if (ret) - return 1; - - return strcmp(name, "vgem"); -} - -int open_vgem(void) -{ - int i, fd; - const char *drmstr = "/dev/dri/card"; - - fd = -1; - for (i = 0; i < 16; i++) { - char name[80]; - - sprintf(name, "%s%u", drmstr, i); - - fd = open(name, O_RDWR); - if (fd < 0) - continue; - - if (check_vgem(fd)) { - close(fd); - continue; - } else { - break; - } - - } - return fd; -} - -int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle) -{ - struct drm_prime_handle import_handle = { 0 }; - int ret; - - import_handle.fd = dma_buf_fd; - import_handle.flags = 0; - import_handle.handle = 0; - - ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle); - if (ret == 0) - *handle = import_handle.handle; - return ret; -} - -void close_handle(int vgem_fd, uint32_t handle) -{ - struct drm_gem_close close = { 0 }; - - close.handle = handle; - ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close); -} - -int main() -{ - int ret, vgem_fd; - struct ion_buffer_info info; - uint32_t handle = 0; - struct dma_buf_sync sync = { 0 }; - - info.heap_type = ION_HEAP_TYPE_SYSTEM; - info.heap_size = 4096; - info.flag_type = ION_FLAG_CACHED; - - ret = ion_export_buffer_fd(&info); - if (ret < 0) { - printf("ion buffer alloc failed\n"); - return -1; - } - - vgem_fd = open_vgem(); - if (vgem_fd < 0) { - ret = vgem_fd; - printf("Failed to open vgem\n"); - goto out_ion; - } - - ret = import_vgem_fd(vgem_fd, info.buffd, &handle); - - if (ret < 0) { - printf("Failed to import buffer\n"); - goto out_vgem; - } - - sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW; - ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync); - if (ret) - printf("sync start failed %d\n", errno); - - memset(info.buffer, 0xff, 4096); - - sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW; - ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync); - if (ret) - printf("sync end failed %d\n", errno); - - close_handle(vgem_fd, handle); - ret = 0; - -out_vgem: - close(vgem_fd); -out_ion: - ion_close_buffer_fd(&info); - printf("done.\n"); - return ret; -} diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c deleted file mode 100644 index 7d1d37c4ef6a..000000000000 --- a/tools/testing/selftests/android/ion/ionutils.c +++ /dev/null @@ -1,253 +0,0 @@ -#include -#include -#include -#include -#include -//#include -#include -#include -#include "ionutils.h" -#include "ipcsocket.h" - - -void write_buffer(void *buffer, unsigned long len) -{ - int i; - unsigned char *ptr = (unsigned char *)buffer; - - if (!ptr) { - fprintf(stderr, "<%s>: Invalid buffer...\n", __func__); - return; - } - - printf("Fill buffer content:\n"); - memset(ptr, 0xfd, len); - for (i = 0; i < len; i++) - printf("0x%x ", ptr[i]); - printf("\n"); -} - -void read_buffer(void *buffer, unsigned long len) -{ - int i; - unsigned char *ptr = (unsigned char *)buffer; - - if (!ptr) { - fprintf(stderr, "<%s>: Invalid buffer...\n", __func__); - return; - } - - printf("Read buffer content:\n"); - for (i = 0; i < len; i++) - printf("0x%x ", ptr[i]); - printf("\n"); -} - -int ion_export_buffer_fd(struct ion_buffer_info *ion_info) -{ - int i, ret, ionfd, buffer_fd; - unsigned int heap_id; - unsigned long maplen; - unsigned char *map_buffer; - struct ion_allocation_data alloc_data; - struct ion_heap_query query; - struct ion_heap_data heap_data[MAX_HEAP_COUNT]; - - if (!ion_info) { - fprintf(stderr, "<%s>: Invalid ion info\n", __func__); - return -1; - } - - /* Create an ION client */ - ionfd = open(ION_DEVICE, O_RDWR); - if (ionfd < 0) { - fprintf(stderr, "<%s>: Failed to open ion client: %s\n", - __func__, strerror(errno)); - return -1; - } - - memset(&query, 0, sizeof(query)); - query.cnt = MAX_HEAP_COUNT; - query.heaps = (unsigned long int)&heap_data[0]; - /* Query ION heap_id_mask from ION heap */ - ret = ioctl(ionfd, ION_IOC_HEAP_QUERY, &query); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed: ION_IOC_HEAP_QUERY: %s\n", - __func__, strerror(errno)); - goto err_query; - } - - heap_id = MAX_HEAP_COUNT + 1; - for (i = 0; i < query.cnt; i++) { - if (heap_data[i].type == ion_info->heap_type) { - heap_id = heap_data[i].heap_id; - break; - } - } - - if (heap_id > MAX_HEAP_COUNT) { - fprintf(stderr, "<%s>: ERROR: heap type does not exists\n", - __func__); - goto err_heap; - } - - alloc_data.len = ion_info->heap_size; - alloc_data.heap_id_mask = 1 << heap_id; - alloc_data.flags = ion_info->flag_type; - - /* Allocate memory for this ION client as per heap_type */ - ret = ioctl(ionfd, ION_IOC_ALLOC, &alloc_data); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed: ION_IOC_ALLOC: %s\n", - __func__, strerror(errno)); - goto err_alloc; - } - - /* This will return a valid buffer fd */ - buffer_fd = alloc_data.fd; - maplen = alloc_data.len; - - if (buffer_fd < 0 || maplen <= 0) { - fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n", - __func__, buffer_fd, maplen); - goto err_fd_data; - } - - /* Create memory mapped buffer for the buffer fd */ - map_buffer = (unsigned char *)mmap(NULL, maplen, PROT_READ|PROT_WRITE, - MAP_SHARED, buffer_fd, 0); - if (map_buffer == MAP_FAILED) { - fprintf(stderr, "<%s>: Failed: mmap: %s\n", - __func__, strerror(errno)); - goto err_mmap; - } - - ion_info->ionfd = ionfd; - ion_info->buffd = buffer_fd; - ion_info->buffer = map_buffer; - ion_info->buflen = maplen; - - return 0; - - munmap(map_buffer, maplen); - -err_fd_data: -err_mmap: - /* in case of error: close the buffer fd */ - if (buffer_fd) - close(buffer_fd); - -err_query: -err_heap: -err_alloc: - /* In case of error: close the ion client fd */ - if (ionfd) - close(ionfd); - - return -1; -} - -int ion_import_buffer_fd(struct ion_buffer_info *ion_info) -{ - int buffd; - unsigned char *map_buf; - unsigned long map_len; - - if (!ion_info) { - fprintf(stderr, "<%s>: Invalid ion info\n", __func__); - return -1; - } - - map_len = ion_info->buflen; - buffd = ion_info->buffd; - - if (buffd < 0 || map_len <= 0) { - fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n", - __func__, buffd, map_len); - goto err_buffd; - } - - map_buf = (unsigned char *)mmap(NULL, map_len, PROT_READ|PROT_WRITE, - MAP_SHARED, buffd, 0); - if (map_buf == MAP_FAILED) { - printf("<%s>: Failed - mmap: %s\n", - __func__, strerror(errno)); - goto err_mmap; - } - - ion_info->buffer = map_buf; - ion_info->buflen = map_len; - - return 0; - -err_mmap: - if (buffd) - close(buffd); - -err_buffd: - return -1; -} - -void ion_close_buffer_fd(struct ion_buffer_info *ion_info) -{ - if (ion_info) { - /* unmap the buffer properly in the end */ - munmap(ion_info->buffer, ion_info->buflen); - /* close the buffer fd */ - if (ion_info->buffd > 0) - close(ion_info->buffd); - /* Finally, close the client fd */ - if (ion_info->ionfd > 0) - close(ion_info->ionfd); - } -} - -int socket_send_fd(struct socket_info *info) -{ - int status; - int fd, sockfd; - struct socketdata skdata; - - if (!info) { - fprintf(stderr, "<%s>: Invalid socket info\n", __func__); - return -1; - } - - sockfd = info->sockfd; - fd = info->datafd; - memset(&skdata, 0, sizeof(skdata)); - skdata.data = fd; - skdata.len = sizeof(skdata.data); - status = sendtosocket(sockfd, &skdata); - if (status < 0) { - fprintf(stderr, "<%s>: Failed: sendtosocket\n", __func__); - return -1; - } - - return 0; -} - -int socket_receive_fd(struct socket_info *info) -{ - int status; - int fd, sockfd; - struct socketdata skdata; - - if (!info) { - fprintf(stderr, "<%s>: Invalid socket info\n", __func__); - return -1; - } - - sockfd = info->sockfd; - memset(&skdata, 0, sizeof(skdata)); - status = receivefromsocket(sockfd, &skdata); - if (status < 0) { - fprintf(stderr, "<%s>: Failed: receivefromsocket\n", __func__); - return -1; - } - - fd = (int)skdata.data; - info->datafd = fd; - - return status; -} diff --git a/tools/testing/selftests/android/ion/ionutils.h b/tools/testing/selftests/android/ion/ionutils.h deleted file mode 100644 index 9941eb858576..000000000000 --- a/tools/testing/selftests/android/ion/ionutils.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef __ION_UTILS_H -#define __ION_UTILS_H - -#include "ion.h" - -#define SOCKET_NAME "ion_socket" -#define ION_DEVICE "/dev/ion" - -#define ION_BUFFER_LEN 4096 -#define MAX_HEAP_COUNT ION_HEAP_TYPE_CUSTOM - -struct socket_info { - int sockfd; - int datafd; - unsigned long buflen; -}; - -struct ion_buffer_info { - int ionfd; - int buffd; - unsigned int heap_type; - unsigned int flag_type; - unsigned long heap_size; - unsigned long buflen; - unsigned char *buffer; -}; - - -/* This is used to fill the data into the mapped buffer */ -void write_buffer(void *buffer, unsigned long len); - -/* This is used to read the data from the exported buffer */ -void read_buffer(void *buffer, unsigned long len); - -/* This is used to create an ION buffer FD for the kernel buffer - * So you can export this same buffer to others in the form of FD - */ -int ion_export_buffer_fd(struct ion_buffer_info *ion_info); - -/* This is used to import or map an exported FD. - * So we point to same buffer without making a copy. Hence zero-copy. - */ -int ion_import_buffer_fd(struct ion_buffer_info *ion_info); - -/* This is used to close all references for the ION client */ -void ion_close_buffer_fd(struct ion_buffer_info *ion_info); - -/* This is used to send FD to another process using socket IPC */ -int socket_send_fd(struct socket_info *skinfo); - -/* This is used to receive FD from another process using socket IPC */ -int socket_receive_fd(struct socket_info *skinfo); - - -#endif diff --git a/tools/testing/selftests/android/ion/ipcsocket.c b/tools/testing/selftests/android/ion/ipcsocket.c deleted file mode 100644 index 7dc521002095..000000000000 --- a/tools/testing/selftests/android/ion/ipcsocket.c +++ /dev/null @@ -1,227 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ipcsocket.h" - - -int opensocket(int *sockfd, const char *name, int connecttype) -{ - int ret, temp = 1; - - if (!name || strlen(name) > MAX_SOCK_NAME_LEN) { - fprintf(stderr, "<%s>: Invalid socket name.\n", __func__); - return -1; - } - - ret = socket(PF_LOCAL, SOCK_STREAM, 0); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed socket: <%s>\n", - __func__, strerror(errno)); - return ret; - } - - *sockfd = ret; - if (setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR, - (char *)&temp, sizeof(int)) < 0) { - fprintf(stderr, "<%s>: Failed setsockopt: <%s>\n", - __func__, strerror(errno)); - goto err; - } - - sprintf(sock_name, "/tmp/%s", name); - - if (connecttype == 1) { - /* This is for Server connection */ - struct sockaddr_un skaddr; - int clientfd; - socklen_t sklen; - - unlink(sock_name); - memset(&skaddr, 0, sizeof(skaddr)); - skaddr.sun_family = AF_LOCAL; - strcpy(skaddr.sun_path, sock_name); - - ret = bind(*sockfd, (struct sockaddr *)&skaddr, - SUN_LEN(&skaddr)); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed bind: <%s>\n", - __func__, strerror(errno)); - goto err; - } - - ret = listen(*sockfd, 5); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed listen: <%s>\n", - __func__, strerror(errno)); - goto err; - } - - memset(&skaddr, 0, sizeof(skaddr)); - sklen = sizeof(skaddr); - - ret = accept(*sockfd, (struct sockaddr *)&skaddr, - (socklen_t *)&sklen); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed accept: <%s>\n", - __func__, strerror(errno)); - goto err; - } - - clientfd = ret; - *sockfd = clientfd; - } else { - /* This is for client connection */ - struct sockaddr_un skaddr; - - memset(&skaddr, 0, sizeof(skaddr)); - skaddr.sun_family = AF_LOCAL; - strcpy(skaddr.sun_path, sock_name); - - ret = connect(*sockfd, (struct sockaddr *)&skaddr, - SUN_LEN(&skaddr)); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed connect: <%s>\n", - __func__, strerror(errno)); - goto err; - } - } - - return 0; - -err: - if (*sockfd) - close(*sockfd); - - return ret; -} - -int sendtosocket(int sockfd, struct socketdata *skdata) -{ - int ret, buffd; - unsigned int len; - char cmsg_b[CMSG_SPACE(sizeof(int))]; - struct cmsghdr *cmsg; - struct msghdr msgh; - struct iovec iov; - struct timeval timeout; - fd_set selFDs; - - if (!skdata) { - fprintf(stderr, "<%s>: socketdata is NULL\n", __func__); - return -1; - } - - FD_ZERO(&selFDs); - FD_SET(0, &selFDs); - FD_SET(sockfd, &selFDs); - timeout.tv_sec = 20; - timeout.tv_usec = 0; - - ret = select(sockfd+1, NULL, &selFDs, NULL, &timeout); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed select: <%s>\n", - __func__, strerror(errno)); - return -1; - } - - if (FD_ISSET(sockfd, &selFDs)) { - buffd = skdata->data; - len = skdata->len; - memset(&msgh, 0, sizeof(msgh)); - msgh.msg_control = &cmsg_b; - msgh.msg_controllen = CMSG_LEN(len); - iov.iov_base = "OK"; - iov.iov_len = 2; - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - cmsg = CMSG_FIRSTHDR(&msgh); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(len); - memcpy(CMSG_DATA(cmsg), &buffd, len); - - ret = sendmsg(sockfd, &msgh, MSG_DONTWAIT); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed sendmsg: <%s>\n", - __func__, strerror(errno)); - return -1; - } - } - - return 0; -} - -int receivefromsocket(int sockfd, struct socketdata *skdata) -{ - int ret, buffd; - unsigned int len = 0; - char cmsg_b[CMSG_SPACE(sizeof(int))]; - struct cmsghdr *cmsg; - struct msghdr msgh; - struct iovec iov; - fd_set recvFDs; - char data[32]; - - if (!skdata) { - fprintf(stderr, "<%s>: socketdata is NULL\n", __func__); - return -1; - } - - FD_ZERO(&recvFDs); - FD_SET(0, &recvFDs); - FD_SET(sockfd, &recvFDs); - - ret = select(sockfd+1, &recvFDs, NULL, NULL, NULL); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed select: <%s>\n", - __func__, strerror(errno)); - return -1; - } - - if (FD_ISSET(sockfd, &recvFDs)) { - len = sizeof(buffd); - memset(&msgh, 0, sizeof(msgh)); - msgh.msg_control = &cmsg_b; - msgh.msg_controllen = CMSG_LEN(len); - iov.iov_base = data; - iov.iov_len = sizeof(data)-1; - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - cmsg = CMSG_FIRSTHDR(&msgh); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(len); - - ret = recvmsg(sockfd, &msgh, MSG_DONTWAIT); - if (ret < 0) { - fprintf(stderr, "<%s>: Failed recvmsg: <%s>\n", - __func__, strerror(errno)); - return -1; - } - - memcpy(&buffd, CMSG_DATA(cmsg), len); - skdata->data = buffd; - skdata->len = len; - } - return 0; -} - -int closesocket(int sockfd, char *name) -{ - char sockname[MAX_SOCK_NAME_LEN]; - - if (sockfd) - close(sockfd); - sprintf(sockname, "/tmp/%s", name); - unlink(sockname); - shutdown(sockfd, 2); - - return 0; -} diff --git a/tools/testing/selftests/android/ion/ipcsocket.h b/tools/testing/selftests/android/ion/ipcsocket.h deleted file mode 100644 index b3e84498a8a1..000000000000 --- a/tools/testing/selftests/android/ion/ipcsocket.h +++ /dev/null @@ -1,35 +0,0 @@ - -#ifndef _IPCSOCKET_H -#define _IPCSOCKET_H - - -#define MAX_SOCK_NAME_LEN 64 - -char sock_name[MAX_SOCK_NAME_LEN]; - -/* This structure is responsible for holding the IPC data - * data: hold the buffer fd - * len: just the length of 32-bit integer fd - */ -struct socketdata { - int data; - unsigned int len; -}; - -/* This API is used to open the IPC socket connection - * name: implies a unique socket name in the system - * connecttype: implies server(0) or client(1) - */ -int opensocket(int *sockfd, const char *name, int connecttype); - -/* This is the API to send socket data over IPC socket */ -int sendtosocket(int sockfd, struct socketdata *data); - -/* This is the API to receive socket data over IPC socket */ -int receivefromsocket(int sockfd, struct socketdata *data); - -/* This is the API to close the socket connection */ -int closesocket(int sockfd, char *name); - - -#endif diff --git a/tools/testing/selftests/android/run.sh b/tools/testing/selftests/android/run.sh deleted file mode 100755 index dd8edf291454..000000000000 --- a/tools/testing/selftests/android/run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -(cd ion; ./ion_test.sh) -- cgit v1.3.1 From 472547778de24e2764ab325268dd5b77e6923939 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 22 Oct 2020 13:27:38 -0700 Subject: selftest/bpf: Fix profiler test using CO-RE relocation for enums Instead of hard-coding invalid pids_cgrp_id, use Kconfig to detect the presence of that enum value and CO-RE to capture its actual value in the hosts's kernel. Fixes: 03d4d13fab3f ("selftests/bpf: Add profiler test") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Tested-by: Song Liu Link: https://lore.kernel.org/bpf/20201022202739.3667367-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/profiler.inc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 00578311a423..30982a7e4d0f 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node) } } -int pids_cgrp_id = 1; +extern bool CONFIG_CGROUP_PIDS __kconfig __weak; +enum cgroup_subsys_id___local { + pids_cgrp_id___local = 123, /* value doesn't matter */ +}; static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, struct task_struct* task, @@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); - if (ENABLE_CGROUP_V1_RESOLVER) { + if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { + int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, + pids_cgrp_id___local); #ifdef UNROLL #pragma unroll #endif @@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, cgroups, subsys[i]); if (subsys != NULL) { int subsys_id = BPF_CORE_READ(subsys, ss, id); - if (subsys_id == pids_cgrp_id) { + if (subsys_id == cgrp_id) { proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); break; -- cgit v1.3.1 From 3adb776384f2042ef6bda876e91a7a7ac2872c5e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Oct 2020 21:08:03 -0700 Subject: x86, libnvdimm/test: Remove COPY_MC_TEST The COPY_MC_TEST facility has served its purpose for validating the early termination conditions of the copy_mc_fragile() implementation. Remove it and the EXPORT_SYMBOL_GPL of copy_mc_fragile(). Reported-by: Borislav Petkov Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/160316688322.3374697.8648308115165836243.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/Kconfig.debug | 3 -- arch/x86/include/asm/copy_mc_test.h | 75 -------------------------- arch/x86/lib/copy_mc.c | 4 -- arch/x86/lib/copy_mc_64.S | 10 ---- tools/testing/nvdimm/test/nfit.c | 103 ------------------------------------ 5 files changed, 195 deletions(-) delete mode 100644 arch/x86/include/asm/copy_mc_test.h (limited to 'tools') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 27b5e2bc6a01..80b57e7f4947 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -62,9 +62,6 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. -config COPY_MC_TEST - def_bool n - config EFI_PGT_DUMP bool "Dump the EFI pagetable" depends on EFI diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h deleted file mode 100644 index e4991ba96726..000000000000 --- a/arch/x86/include/asm/copy_mc_test.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _COPY_MC_TEST_H_ -#define _COPY_MC_TEST_H_ - -#ifndef __ASSEMBLY__ -#ifdef CONFIG_COPY_MC_TEST -extern unsigned long copy_mc_test_src; -extern unsigned long copy_mc_test_dst; - -static inline void copy_mc_inject_src(void *addr) -{ - if (addr) - copy_mc_test_src = (unsigned long) addr; - else - copy_mc_test_src = ~0UL; -} - -static inline void copy_mc_inject_dst(void *addr) -{ - if (addr) - copy_mc_test_dst = (unsigned long) addr; - else - copy_mc_test_dst = ~0UL; -} -#else /* CONFIG_COPY_MC_TEST */ -static inline void copy_mc_inject_src(void *addr) -{ -} - -static inline void copy_mc_inject_dst(void *addr) -{ -} -#endif /* CONFIG_COPY_MC_TEST */ - -#else /* __ASSEMBLY__ */ -#include - -#ifdef CONFIG_COPY_MC_TEST -.macro COPY_MC_TEST_CTL - .pushsection .data - .align 8 - .globl copy_mc_test_src - copy_mc_test_src: - .quad 0 - EXPORT_SYMBOL_GPL(copy_mc_test_src) - .globl copy_mc_test_dst - copy_mc_test_dst: - .quad 0 - EXPORT_SYMBOL_GPL(copy_mc_test_dst) - .popsection -.endm - -.macro COPY_MC_TEST_SRC reg count target - leaq \count(\reg), %r9 - cmp copy_mc_test_src, %r9 - ja \target -.endm - -.macro COPY_MC_TEST_DST reg count target - leaq \count(\reg), %r9 - cmp copy_mc_test_dst, %r9 - ja \target -.endm -#else -.macro COPY_MC_TEST_CTL -.endm - -.macro COPY_MC_TEST_SRC reg count target -.endm - -.macro COPY_MC_TEST_DST reg count target -.endm -#endif /* CONFIG_COPY_MC_TEST */ -#endif /* __ASSEMBLY__ */ -#endif /* _COPY_MC_TEST_H_ */ diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c index c13e8c9ee926..80efd45a7761 100644 --- a/arch/x86/lib/copy_mc.c +++ b/arch/x86/lib/copy_mc.c @@ -10,10 +10,6 @@ #include #ifdef CONFIG_X86_MCE -/* - * See COPY_MC_TEST for self-test of the copy_mc_fragile() - * implementation. - */ static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key); void enable_copy_mc_fragile(void) diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S index 892d8915f609..e5f77e293034 100644 --- a/arch/x86/lib/copy_mc_64.S +++ b/arch/x86/lib/copy_mc_64.S @@ -2,14 +2,11 @@ /* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */ #include -#include -#include #include #ifndef CONFIG_UML #ifdef CONFIG_X86_MCE -COPY_MC_TEST_CTL /* * copy_mc_fragile - copy memory with indication if an exception / fault happened @@ -38,8 +35,6 @@ SYM_FUNC_START(copy_mc_fragile) subl %ecx, %edx .L_read_leading_bytes: movb (%rsi), %al - COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes - COPY_MC_TEST_DST %rdi 1 .E_leading_bytes .L_write_leading_bytes: movb %al, (%rdi) incq %rsi @@ -55,8 +50,6 @@ SYM_FUNC_START(copy_mc_fragile) .L_read_words: movq (%rsi), %r8 - COPY_MC_TEST_SRC %rsi 8 .E_read_words - COPY_MC_TEST_DST %rdi 8 .E_write_words .L_write_words: movq %r8, (%rdi) addq $8, %rsi @@ -73,8 +66,6 @@ SYM_FUNC_START(copy_mc_fragile) movl %edx, %ecx .L_read_trailing_bytes: movb (%rsi), %al - COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes - COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes .L_write_trailing_bytes: movb %al, (%rdi) incq %rsi @@ -88,7 +79,6 @@ SYM_FUNC_START(copy_mc_fragile) .L_done: ret SYM_FUNC_END(copy_mc_fragile) -EXPORT_SYMBOL_GPL(copy_mc_fragile) .section .fixup, "ax" /* diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 2ac0fff6dad8..9b185bf82da8 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -23,7 +23,6 @@ #include "nfit_test.h" #include "../watermark.h" -#include #include /* @@ -3284,107 +3283,6 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; -static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); - -enum INJECT { - INJECT_NONE, - INJECT_SRC, - INJECT_DST, -}; - -static void copy_mc_test_init(char *dst, char *src, size_t size) -{ - size_t i; - - memset(dst, 0xff, size); - for (i = 0; i < size; i++) - src[i] = (char) i; -} - -static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src, - size_t size, unsigned long rem) -{ - size_t i; - - for (i = 0; i < size - rem; i++) - if (dst[i] != (unsigned char) i) { - pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n", - __func__, __LINE__, i, dst[i], - (unsigned char) i); - return false; - } - for (i = size - rem; i < size; i++) - if (dst[i] != 0xffU) { - pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n", - __func__, __LINE__, i, dst[i]); - return false; - } - return true; -} - -void copy_mc_test(void) -{ - char *inject_desc[] = { "none", "source", "destination" }; - enum INJECT inj; - - if (IS_ENABLED(CONFIG_COPY_MC_TEST)) { - pr_info("%s: run...\n", __func__); - } else { - pr_info("%s: disabled, skip.\n", __func__); - return; - } - - for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) { - int i; - - pr_info("%s: inject: %s\n", __func__, inject_desc[inj]); - for (i = 0; i < 512; i++) { - unsigned long expect, rem; - void *src, *dst; - bool valid; - - switch (inj) { - case INJECT_NONE: - copy_mc_inject_src(NULL); - copy_mc_inject_dst(NULL); - dst = ©_mc_buf[2048]; - src = ©_mc_buf[1024 - i]; - expect = 0; - break; - case INJECT_SRC: - copy_mc_inject_src(©_mc_buf[1024]); - copy_mc_inject_dst(NULL); - dst = ©_mc_buf[2048]; - src = ©_mc_buf[1024 - i]; - expect = 512 - i; - break; - case INJECT_DST: - copy_mc_inject_src(NULL); - copy_mc_inject_dst(©_mc_buf[2048]); - dst = ©_mc_buf[2048 - i]; - src = ©_mc_buf[1024]; - expect = 512 - i; - break; - } - - copy_mc_test_init(dst, src, 512); - rem = copy_mc_fragile(dst, src, 512); - valid = copy_mc_test_validate(dst, src, 512, expect); - if (rem == expect && valid) - continue; - pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", - __func__, - ((unsigned long) dst) & ~PAGE_MASK, - ((unsigned long ) src) & ~PAGE_MASK, - 512, i, rem, valid ? "valid" : "bad", - expect); - } - } - - copy_mc_inject_src(NULL); - copy_mc_inject_dst(NULL); -} - static __init int nfit_test_init(void) { int rc, i; @@ -3393,7 +3291,6 @@ static __init int nfit_test_init(void) libnvdimm_test(); acpi_nfit_test(); device_dax_test(); - copy_mc_test(); dax_pmem_test(); dax_pmem_core_test(); #ifdef CONFIG_DEV_DAX_PMEM_COMPAT -- cgit v1.3.1 From 3023d8ff3fc60e5d32dc1d05f99ad6ffa12b0033 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Oct 2020 20:04:55 -0700 Subject: kunit: Fix kunit.py --raw_output option Due to the raw_output() function on kunit_parser.py actually being a generator, it only runs if something reads the lines it returns. Since we no-longer do that (parsing doesn't actually happen if raw_output is enabled), it was not printing anything. Fixes: 45ba7a893ad8 ("kunit: kunit_tool: Separate out config/build/exec/parse") Signed-off-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 8019e3dd4c32..744ee9cb0073 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -66,7 +66,6 @@ def isolate_kunit_output(kernel_output): def raw_output(kernel_output): for line in kernel_output: print(line) - yield line DIVIDER = '=' * 60 -- cgit v1.3.1 From 3fc48259d5250f7a3ee021ad0492b604c428c564 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Oct 2020 19:28:17 +0300 Subject: kunit: Don't fail test suites if one of them is empty Empty test suite is okay test suite. Don't fail the rest of the test suites if one of them is empty. Fixes: 6ebf5866f2e8 ("kunit: tool: add Python wrappers for running KUnit tests") Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 744ee9cb0073..84a1af2581f5 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -241,7 +241,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: return None test_suite.name = name expected_test_case_num = parse_subtest_plan(lines) - if not expected_test_case_num: + if expected_test_case_num is None: return None while expected_test_case_num > 0: test_case = parse_test_case(lines) -- cgit v1.3.1 From 0d0d245104a42e593adcf11396017a6420c08ba8 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Wed, 21 Oct 2020 13:39:14 -0700 Subject: kunit: tools: fix kunit_tool tests for parsing test plans Some tests logs for kunit_tool tests are missing their test plans causing their tests to fail; fix this by adding the test plans. Fixes: 45dcbb6f5ef7 ("kunit: test: add test plan to KUnit TAP format") Signed-off-by: Brendan Higgins Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_tool_test.py | 32 +++++++++++++++++----- .../kunit/test_data/test_config_printk_time.log | 3 +- .../test_data/test_interrupted_tap_output.log | 3 +- .../test_data/test_kernel_panic_interrupt.log | 3 +- .../kunit/test_data/test_multiple_prefixes.log | 3 +- .../kunit/test_data/test_pound_no_prefix.log | 3 +- tools/testing/kunit/test_data/test_pound_sign.log | 1 + 7 files changed, 36 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 99c3c5671ea4..0b60855fb819 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -179,7 +179,7 @@ class KUnitParserTest(unittest.TestCase): print_mock = mock.patch('builtins.print').start() result = kunit_parser.parse_run_tests( kunit_parser.isolate_kunit_output(file.readlines())) - print_mock.assert_any_call(StrContains("no kunit output detected")) + print_mock.assert_any_call(StrContains('no tests run!')) print_mock.stop() file.close() @@ -198,39 +198,57 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_config_printk_time.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_ignores_multiple_prefixes(self): prefix_log = get_absolute_path( 'test_data/test_multiple_prefixes.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = get_absolute_path( 'test_data/test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_poundsign(self): pound_log = get_absolute_path('test_data/test_pound_sign.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_kernel_panic_end(self): panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log') with open(panic_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.TEST_CRASHED, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_pound_no_prefix(self): pound_log = get_absolute_path('test_data/test_pound_no_prefix.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) class KUnitJsonTest(unittest.TestCase): diff --git a/tools/testing/kunit/test_data/test_config_printk_time.log b/tools/testing/kunit/test_data/test_config_printk_time.log index c02ca773946d..6bdb57f76eac 100644 --- a/tools/testing/kunit/test_data/test_config_printk_time.log +++ b/tools/testing/kunit/test_data/test_config_printk_time.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -28,4 +29,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_interrupted_tap_output.log b/tools/testing/kunit/test_data/test_interrupted_tap_output.log index 5c73fb3a1c6f..1fb677728abe 100644 --- a/tools/testing/kunit/test_data/test_interrupted_tap_output.log +++ b/tools/testing/kunit/test_data/test_interrupted_tap_output.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -34,4 +35,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log index c045eee75f27..a014ffe9725e 100644 --- a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log +++ b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -22,4 +23,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_multiple_prefixes.log b/tools/testing/kunit/test_data/test_multiple_prefixes.log index bc48407dcc36..0ad78481a0b4 100644 --- a/tools/testing/kunit/test_data/test_multiple_prefixes.log +++ b/tools/testing/kunit/test_data/test_multiple_prefixes.log @@ -1,6 +1,7 @@ [ 0.060000][ T1] printk: console [mc-1] enabled [ 0.060000][ T1] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000][ T1] TAP version 14 +[ 0.060000][ T1] 1..3 [ 0.060000][ T1] # Subtest: kunit-resource-test [ 0.060000][ T1] 1..5 [ 0.060000][ T1] ok 1 - kunit_resource_test_init_resources @@ -28,4 +29,4 @@ [ 0.060000][ T1] Stack: [ 0.060000][ T1] 602086f8 601bc260 705c0000 705c0000 [ 0.060000][ T1] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file +[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_pound_no_prefix.log b/tools/testing/kunit/test_data/test_pound_no_prefix.log index 2ceb360be7d5..dc4cf09a96d0 100644 --- a/tools/testing/kunit/test_data/test_pound_no_prefix.log +++ b/tools/testing/kunit/test_data/test_pound_no_prefix.log @@ -1,6 +1,7 @@ printk: console [mc-1] enabled random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 TAP version 14 + 1..3 # Subtest: kunit-resource-test 1..5 ok 1 - kunit_resource_test_init_resources @@ -30,4 +31,4 @@ Stack: 602086f8 601bc260 705c0000 705c0000 602086f8 6005fcec 705c0000 6002c6ab - 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file + 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log index 28ffa5ba03bf..3f358e3a7ba0 100644 --- a/tools/testing/kunit/test_data/test_pound_sign.log +++ b/tools/testing/kunit/test_data/test_pound_sign.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources -- cgit v1.3.1 From 748f0d70087c56226bf1df1f91a00b7ab4c8f883 Mon Sep 17 00:00:00 2001 From: Brahadambal Srinivasan Date: Fri, 23 Oct 2020 20:55:27 +0530 Subject: cpupower: Provide online and offline CPU information When a user tries to modify cpuidle or cpufreq properties on offline CPUs, the tool returns success (exit status 0) but also does not provide any warning message regarding offline cpus that may have been specified but left unchanged. In case of all or a few CPUs being offline, it can be difficult to keep track of which CPUs didn't get the new frequency or idle state set. Silent failures are difficult to keep track of when there are a huge number of CPUs on which the action is performed. This patch adds helper functions to find both online and offline CPUs and print them out accordingly. We use these helper functions in cpuidle-set and cpufreq-set to print an additional message if the user attempts to modify offline cpus. Reported-by: Pavithra R. Prakash Signed-off-by: Brahadambal Srinivasan Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpufreq-set.c | 3 ++ tools/power/cpupower/utils/cpuidle-set.c | 4 ++ tools/power/cpupower/utils/cpupower.c | 8 ++++ tools/power/cpupower/utils/helpers/helpers.h | 12 +++++ tools/power/cpupower/utils/helpers/misc.c | 66 +++++++++++++++++++++++++++- 5 files changed, 92 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 7b2164e07057..c5e60a39cfa6 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -315,6 +315,7 @@ int cmd_freq_set(int argc, char **argv) } } + get_cpustate(); /* loop over CPUs */ for (cpu = bitmask_first(cpus_chosen); @@ -332,5 +333,7 @@ int cmd_freq_set(int argc, char **argv) } } + print_offline_cpus(); + return 0; } diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index 569f268f4c7f..46158928f9ad 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -95,6 +95,8 @@ int cmd_idle_set(int argc, char **argv) exit(EXIT_FAILURE); } + get_cpustate(); + /* Default is: set all CPUs */ if (bitmask_isallclear(cpus_chosen)) bitmask_setall(cpus_chosen); @@ -181,5 +183,7 @@ int cmd_idle_set(int argc, char **argv) break; } } + + print_offline_cpus(); return EXIT_SUCCESS; } diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 8e3d08042825..8ac3304a9957 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -34,6 +34,8 @@ int run_as_root; int base_cpu; /* Affected cpus chosen by -c/--cpu param */ struct bitmask *cpus_chosen; +struct bitmask *online_cpus; +struct bitmask *offline_cpus; #ifdef DEBUG int be_verbose; @@ -178,6 +180,8 @@ int main(int argc, const char *argv[]) char pathname[32]; cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); + online_cpus = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); + offline_cpus = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); argc--; argv += 1; @@ -230,6 +234,10 @@ int main(int argc, const char *argv[]) ret = p->main(argc, argv); if (cpus_chosen) bitmask_free(cpus_chosen); + if (online_cpus) + bitmask_free(online_cpus); + if (offline_cpus) + bitmask_free(offline_cpus); return ret; } print_help(); diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index c258eeccd05f..d5799aa71e1f 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -94,6 +94,8 @@ struct cpupower_cpu_info { */ extern int get_cpu_info(struct cpupower_cpu_info *cpu_info); extern struct cpupower_cpu_info cpupower_cpu_info; + + /* cpuid and cpuinfo helpers **************************/ /* X86 ONLY ****************************************/ @@ -171,4 +173,14 @@ static inline unsigned int cpuid_ecx(unsigned int op) { return 0; }; static inline unsigned int cpuid_edx(unsigned int op) { return 0; }; #endif /* defined(__i386__) || defined(__x86_64__) */ +/* + * CPU State related functions + */ +extern struct bitmask *online_cpus; +extern struct bitmask *offline_cpus; + +void get_cpustate(void); +void print_online_cpus(void); +void print_offline_cpus(void); + #endif /* __CPUPOWERUTILS_HELPERS__ */ diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index f406adc40bad..2ead98169cf5 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -1,8 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 -#if defined(__i386__) || defined(__x86_64__) + +#include +#include #include "helpers/helpers.h" +#if defined(__i386__) || defined(__x86_64__) + #define MSR_AMD_HWCR 0xc0010015 int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, @@ -41,3 +45,63 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, return 0; } #endif /* #if defined(__i386__) || defined(__x86_64__) */ + +/* get_cpustate + * + * Gather the information of all online CPUs into bitmask struct + */ +void get_cpustate(void) +{ + unsigned int cpu = 0; + + bitmask_clearall(online_cpus); + bitmask_clearall(offline_cpus); + + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (cpupower_is_cpu_online(cpu) == 1) + bitmask_setbit(online_cpus, cpu); + else + bitmask_setbit(offline_cpus, cpu); + + continue; + } +} + +/* print_online_cpus + * + * Print the CPU numbers of all CPUs that are online currently + */ +void print_online_cpus(void) +{ + int str_len = 0; + char *online_cpus_str = NULL; + + str_len = online_cpus->size * 5; + online_cpus_str = (void *)malloc(sizeof(char) * str_len); + + if (!bitmask_isallclear(online_cpus)) { + bitmask_displaylist(online_cpus_str, str_len, online_cpus); + printf(_("Following CPUs are online:\n%s\n"), online_cpus_str); + } +} + +/* print_offline_cpus + * + * Print the CPU numbers of all CPUs that are offline currently + */ +void print_offline_cpus(void) +{ + int str_len = 0; + char *offline_cpus_str = NULL; + + str_len = offline_cpus->size * 5; + offline_cpus_str = (void *)malloc(sizeof(char) * str_len); + + if (!bitmask_isallclear(offline_cpus)) { + bitmask_displaylist(offline_cpus_str, str_len, offline_cpus); + printf(_("Following CPUs are offline:\n%s\n"), offline_cpus_str); + printf(_("cpupower set operation was not performed on them\n")); + } +} -- cgit v1.3.1 From 9270e1a744f8ed953009b0e94b26ed0912d9ec1c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 3 Oct 2020 21:40:22 -0400 Subject: tools: memory-model: Document that the LKMM can easily miss control dependencies Add a small section to the litmus-tests.txt documentation file for the Linux Kernel Memory Model explaining that the memory model often fails to recognize certain control dependencies. Suggested-by: Akira Yokosawa Signed-off-by: Alan Stern Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/litmus-tests.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt index 2f840dcd15cf..8a9d5d2787f9 100644 --- a/tools/memory-model/Documentation/litmus-tests.txt +++ b/tools/memory-model/Documentation/litmus-tests.txt @@ -946,6 +946,23 @@ Limitations of the Linux-kernel memory model (LKMM) include: carrying a dependency, then the compiler can break that dependency by substituting a constant of that value. + Conversely, LKMM sometimes doesn't recognize that a particular + optimization is not allowed, and as a result, thinks that a + dependency is not present (because the optimization would break it). + The memory model misses some pretty obvious control dependencies + because of this limitation. A simple example is: + + r1 = READ_ONCE(x); + if (r1 == 0) + smp_mb(); + WRITE_ONCE(y, 1); + + There is a control dependency from the READ_ONCE to the WRITE_ONCE, + even when r1 is nonzero, but LKMM doesn't realize this and thinks + that the write may execute before the read if r1 != 0. (Yes, that + doesn't make sense if you think about it, but the memory model's + intelligence is limited.) + 2. Multiple access sizes for a single variable are not supported, and neither are misaligned or partially overlapping accesses. -- cgit v1.3.1 From ab8bcad67bee82e4be290b32f0faaf582d7c3edc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Aug 2020 14:00:17 -0700 Subject: tools/memory-model: Move Documentation description to Documentation/README This commit moves the descriptions of the files residing in tools/memory-model/Documentation to a README file in that directory, leaving behind the description of tools/memory-model/Documentation/README itself. After this change, tools/memory-model/Documentation/README provides a guide to the files in the tools/memory-model/Documentation directory, guiding people with different skills and needs to the most appropriate starting point. Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/README | 59 +++++++++++++++++++++++++++++++++ tools/memory-model/README | 22 ++---------- 2 files changed, 61 insertions(+), 20 deletions(-) create mode 100644 tools/memory-model/Documentation/README (limited to 'tools') diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README new file mode 100644 index 000000000000..2d9539f19912 --- /dev/null +++ b/tools/memory-model/Documentation/README @@ -0,0 +1,59 @@ +It has been said that successful communication requires first identifying +what your audience knows and then building a bridge from their current +knowledge to what they need to know. Unfortunately, the expected +Linux-kernel memory model (LKMM) audience might be anywhere from novice +to expert both in kernel hacking and in understanding LKMM. + +This document therefore points out a number of places to start reading, +depending on what you know and what you would like to learn. Please note +that the documents later in this list assume that the reader understands +the material provided by documents earlier in this list. + +o You are new to Linux-kernel concurrency: simple.txt + +o You are familiar with the Linux-kernel concurrency primitives + that you need, and just want to get started with LKMM litmus + tests: litmus-tests.txt + +o You are familiar with Linux-kernel concurrency, and would + like a detailed intuitive understanding of LKMM, including + situations involving more than two threads: recipes.txt + +o You are familiar with Linux-kernel concurrency and the use of + LKMM, and would like a quick reference: cheatsheet.txt + +o You are familiar with Linux-kernel concurrency and the use + of LKMM, and would like to learn about LKMM's requirements, + rationale, and implementation: explanation.txt + +o You are interested in the publications related to LKMM, including + hardware manuals, academic literature, standards-committee + working papers, and LWN articles: references.txt + + +==================== +DESCRIPTION OF FILES +==================== + +README + This file. + +cheatsheet.txt + Quick-reference guide to the Linux-kernel memory model. + +explanation.txt + Detailed description of the memory model. + +litmus-tests.txt + The format, features, capabilities, and limitations of the litmus + tests that LKMM can evaluate. + +recipes.txt + Common memory-ordering patterns. + +references.txt + Background information. + +simple.txt + Starting point for someone new to Linux-kernel concurrency. + And also a reminder of the simpler approaches to concurrency! diff --git a/tools/memory-model/README b/tools/memory-model/README index c8144d4aafa0..39d08d1f0443 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -161,26 +161,8 @@ running LKMM litmus tests. DESCRIPTION OF FILES ==================== -Documentation/cheatsheet.txt - Quick-reference guide to the Linux-kernel memory model. - -Documentation/explanation.txt - Describes the memory model in detail. - -Documentation/litmus-tests.txt - Describes the format, features, capabilities, and limitations - of the litmus tests that LKMM can evaluate. - -Documentation/recipes.txt - Lists common memory-ordering patterns. - -Documentation/references.txt - Provides background reading. - -Documentation/simple.txt - Starting point for someone new to Linux-kernel concurrency. - And also for those needing a reminder of the simpler approaches - to concurrency! +Documentation/README + Guide to the other documents in the Documentation/ directory. linux-kernel.bell Categorizes the relevant instructions, including memory -- cgit v1.3.1 From dc6bf4da825aa0301a46f55fec7c0bb706af2aad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 26 Oct 2020 16:20:32 -0400 Subject: selftests/ftrace: Use $FUNCTION_FORK to reference kernel fork function Commit cad6967ac108 ("fork: introduce kernel_clone()") replaced "_do_fork()" with "kernel_clone()". The ftrace selftests reference the fork function in several of the tests. The rename will make the tests break, but if those names are changed in the tests, they would then break on older kernels. The same set of tests should pass older kernels if they have previously passed. Obviously, a new test may not work on older kernels if the test was added due to a bug or a new feature. The setup of ftracetest will now create a $FUNCTION_FORK bash variable that will contain "_do_fork" for older kernels and "kernel_clone" for newer ones. It figures out the proper name by examining /proc/kallsyms. Note, available_filter_functions could also be used, but because some tests should be able to pass without function tracing enabled, it could not be used. Fixes: eea11285dab3 ("tracing: switch to kernel_clone()") Signed-off-by: Steven Rostedt (VMware) Acked-by: Masami Hiramatsu Signed-off-by: Shuah Khan --- .../selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc | 2 +- .../ftrace/test.d/dynevent/clear_select_events.tc | 2 +- .../ftrace/test.d/dynevent/generic_clear_event.tc | 2 +- .../ftrace/test.d/ftrace/func-filter-notrace-pid.tc | 2 +- .../selftests/ftrace/test.d/ftrace/func-filter-pid.tc | 2 +- .../ftrace/test.d/ftrace/func-filter-stacktrace.tc | 4 ++-- tools/testing/selftests/ftrace/test.d/functions | 7 +++++++ .../selftests/ftrace/test.d/kprobe/add_and_remove.tc | 2 +- tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc | 2 +- .../testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc | 4 ++-- .../selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_args_string.tc | 4 ++-- .../selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc | 10 +++++----- .../selftests/ftrace/test.d/kprobe/kprobe_args_type.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc | 14 +++++++------- .../selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 12 ++++++------ .../selftests/ftrace/test.d/kprobe/kretprobe_args.tc | 4 ++-- tools/testing/selftests/ftrace/test.d/kprobe/profile.tc | 2 +- 19 files changed, 44 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 3bcd4c3624ee..b4da41d126d5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK echo "p:myevent1 $PLACE" >> dynamic_events echo "r:myevent2 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index 438961971b7e..3a0e2885fff5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index a8603bd23e0d..d3e138e8377f 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index acb17ce543d2..80541964b927 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_notrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 9f0a9687c773..2f7211254529 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 98305d76bd04..191d116b7883 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -4,9 +4,9 @@ # requires: set_ftrace_filter # flags: instance -echo kernel_clone:stacktrace >> set_ftrace_filter +echo $FUNCTION_FORK:stacktrace >> set_ftrace_filter -grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter +grep -q "$FUNCTION_FORK:stacktrace:unlimited" set_ftrace_filter (echo "forked"; sleep 1) diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index c5dec55b7d95..a6fac927ee82 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -133,6 +133,13 @@ yield() { ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1 } +# The fork function in the kernel was renamed from "_do_fork" to +# "kernel_fork". As older tests should still work with older kernels +# as well as newer kernels, check which version of fork is used on this +# kernel so that the tests can use the fork function for the running kernel. +FUNCTION_FORK=`(if grep '\bkernel_clone\b' /proc/kallsyms > /dev/null; then + echo kernel_clone; else echo '_do_fork'; fi)` + # Since probe event command may include backslash, explicitly use printf "%s" # to NOT interpret it. ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index 9737cd0578a7..2428a3ed78c9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - adding and removing # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index f9a40af76888..010a8b1d6c1d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - busy event check # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index eb543d3cfe5f..a96a1dc7014f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -3,13 +3,13 @@ # description: Kprobe dynamic event with arguments # requires: kprobe_events -echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events +echo "p:testprobe $FUNCTION_FORK \$stack \$stack0 +0(\$stack)" > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable ( echo "forked") -grep testprobe trace | grep 'kernel_clone' | \ +grep testprobe trace | grep "$FUNCTION_FORK" | \ grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' echo 0 > events/kprobes/testprobe/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 4e5b63be51c9..a053ee2e7d77 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -5,7 +5,7 @@ grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old -echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events +echo "p:testprobe $FUNCTION_FORK comm=\$comm " > kprobe_events grep testprobe kprobe_events | grep -q 'comm=$comm' test -d events/kprobes/testprobe diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a1d70588ab21..84285a6f60b0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -30,13 +30,13 @@ esac : "Test get argument (1)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index bd25dd0ba0d0..717130ed4feb 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then fi : "Test get basic types symbol argument" -echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events -echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +echo "p:testprobe_u $FUNCTION_FORK arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s $FUNCTION_FORK arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events if grep -q "x8/16/32/64" README; then - echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events + echo "p:testprobe_x $FUNCTION_FORK arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events fi -echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo "p:testprobe_bf $FUNCTION_FORK arg1=@linux_proc_banner:b8@4/32" >> kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable @@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace grep "testprobe_bf:.* arg1=.*" trace : "Test get string symbol argument" -echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events +echo "p:testprobe_str $FUNCTION_FORK arg1=@linux_proc_banner:string" > kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 91fcce1c241c..25b7708eb559 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize - echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" + echo "p:testprobe $FUNCTION_FORK \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" } check_types() { # s-type u-type x-type bf-type width diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 0d179094191f..5556292601a4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,29 +5,29 @@ # prepare echo nop > current_tracer -echo kernel_clone > set_ftrace_filter -echo 'p:testprobe kernel_clone' > kprobe_events +echo $FUNCTION_FORK > set_ftrace_filter +echo "p:testprobe $FUNCTION_FORK" > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -35,11 +35,11 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 45d90b6c763d..f0d5b7777ed7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "Create/append/":README # Choose 2 symbols for target -SYM1=kernel_clone +SYM1=$FUNCTION_FORK SYM2=do_exit EVENT_NAME=kprobes/testevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 1b5550ef8a9b..fa928b431555 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -86,15 +86,15 @@ esac # multiprobe errors if grep -q "Create/append/" README && grep -q "imm-value" README; then -echo 'p:kprobes/testevent kernel_clone' > kprobe_events +echo "p:kprobes/testevent $FUNCTION_FORK" > kprobe_events check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE # Explicitly use printf "%s" to not interpret \1 -printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events -check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE +printf "%s" "p:kprobes/testevent $FUNCTION_FORK abcd=\\1" > kprobe_events +check_error "p:kprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE +check_error "^p:kprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE fi # %return suffix errors diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 7ae492c204a4..197cc2afd404 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,14 +4,14 @@ # requires: kprobe_events # Add new kretprobe event -echo 'r:testprobe2 kernel_clone $retval' > kprobe_events +echo "r:testprobe2 $FUNCTION_FORK \$retval" > kprobe_events grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable ( echo "forked") -cat trace | grep testprobe2 | grep -q '<- kernel_clone' +cat trace | grep testprobe2 | grep -q "<- $FUNCTION_FORK" echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index c4093fc1a773..98166fa3eb91 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -4,7 +4,7 @@ # requires: kprobe_events ! grep -q 'myevent' kprobe_profile -echo p:myevent kernel_clone > kprobe_events +echo "p:myevent $FUNCTION_FORK" > kprobe_events grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile echo 1 > events/kprobes/myevent/enable ( echo "forked" ) -- cgit v1.3.1 From e3e40312567087fbe6880f316cb2b0e1f3d8a82c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Oct 2020 14:25:01 +0100 Subject: selftests/ftrace: check for do_sys_openat2 in user-memory test More recent libc implementations are now using openat/openat2 system calls so also add do_sys_openat2 to the tracing so that the test passes on these systems because do_sys_open may not be called. Thanks to Masami Hiramatsu for the help on getting this fix to work correctly. Signed-off-by: Colin Ian King Acked-by: Masami Hiramatsu Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index a30a9c07290d..d25d01a19778 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]' || exit_unsupported :;: "user-memory access syntax and ustring working on user memory";: echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \ > kprobe_events +echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \ + >> kprobe_events grep myevent kprobe_events | \ grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string' echo 1 > events/kprobes/myevent/enable +echo 1 > events/kprobes/myevent2/enable echo > /dev/null echo 0 > events/kprobes/myevent/enable +echo 0 > events/kprobes/myevent2/enable grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"' -- cgit v1.3.1 From f825d3f7ed9305e7dd0a3e0a74673a4257d0cc53 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:21 +0300 Subject: selftests: filter kselftest headers from command in lib.mk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1056d3d2c97e ("selftests: enforce local header dependency in lib.mk") added header dependency to the rule, but as the rule uses $^, the headers are added to the compiler command line. This can cause unexpected precompiled header files being generated when compilation fails: $ echo { >> openat2_test.c $ make gcc -Wall -O2 -g -fsanitize=address -fsanitize=undefined openat2_test.c tools/testing/selftests/kselftest_harness.h tools/testing/selftests/kselftest.h helpers.c -o tools/testing/selftests/openat2/openat2_test openat2_test.c:313:1: error: expected identifier or ‘(’ before ‘{’ token 313 | { | ^ make: *** [../lib.mk:140: tools/testing/selftests/openat2/openat2_test] Error 1 $ file openat2_test* openat2_test: GCC precompiled header (version 014) for C openat2_test.c: C source, ASCII text Fix it by filtering out the headers, so that we'll only pass the actual *.c files in the compiler command line. Fixes: 1056d3d2c97e ("selftests: enforce local header dependency in lib.mk") Signed-off-by: Tommi Rantala Acked-by: Kees Cook Reviewed-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 30848ca36555..a5ce26d548e4 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -136,7 +136,7 @@ endif ifeq ($(OVERRIDE_TARGETS),) LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h $(OUTPUT)/%:%.c $(LOCAL_HDRS) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ $(OUTPUT)/%.o:%.S $(COMPILE.S) $^ -o $@ -- cgit v1.3.1 From 1948172fdba5ad643529ddcd00a601c0caa913ed Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:22 +0300 Subject: selftests: pidfd: fix compilation errors due to wait.h Drop unneeded header inclusion to fix pidfd compilation errors seen in Fedora 32: In file included from pidfd_open_test.c:9: ../../../../usr/include/linux/wait.h:17:16: error: expected identifier before numeric constant 17 | #define P_ALL 0 | ^ Signed-off-by: Tommi Rantala Reviewed-by: Kees Cook Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_open_test.c | 1 - tools/testing/selftests/pidfd/pidfd_poll_test.c | 1 - 2 files changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index b9fe75fc3e51..8a59438ccc78 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 4b115444dfe9..610811275357 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #include #include -#include #include #include #include -- cgit v1.3.1 From ef7086347c82c53a6c5238bd2cf31379f6acadde Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:24 +0300 Subject: selftests/harness: prettify SKIP message whitespace again Commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") replaced XFAIL with SKIP in the output. Add one more space to make the output aligned and pretty again. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala Acked-by: Kees Cook Reviewed-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index f19804df244c..d2cba10acc6d 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -126,7 +126,7 @@ snprintf(_metadata->results->reason, \ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \ if (TH_LOG_ENABLED) { \ - fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ + fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ _metadata->passed = 1; \ -- cgit v1.3.1 From 0b18fed98bf96ba5ac14ab7c43c8a3364cb0daf8 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:25 +0300 Subject: selftests: pidfd: use ksft_test_result_skip() when skipping test There's planned tests != run tests in pidfd_test when some test is skipped: $ ./pidfd_test TAP version 13 1..8 [...] # pidfd_send_signal signal recycled pid test: Skipping test # Planned tests != run tests (8 != 7) # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Fix by using ksft_test_result_skip(): $ ./pidfd_test TAP version 13 1..8 [...] ok 8 # SKIP pidfd_send_signal signal recycled pid test: Unsharing pid namespace not permitted # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:1 error:0 Signed-off-by: Tommi Rantala Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c585aaa2acd8..529eb700ac26 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void) ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n", test_name, PID_RECYCLE); case PIDFD_SKIP: - ksft_print_msg("%s test: Skipping test\n", test_name); + ksft_test_result_skip("%s test: Skipping test\n", test_name); ret = 0; break; case PIDFD_XFAIL: -- cgit v1.3.1 From b5ec9fe5be5e02e7db9e79aaa9a1ea7a3419d0b5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:26 +0300 Subject: selftests: pidfd: skip test on kcmp() ENOSYS Skip test if kcmp() is not available, for example if kernel is compiled without CONFIG_CHECKPOINT_RESTORE=y. Signed-off-by: Tommi Rantala Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_getfd_test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 7758c98be015..0930e2411dfb 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -204,7 +204,10 @@ TEST_F(child, fetch_fd) fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); ASSERT_GE(fd, 0); - EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd)); + ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd); + if (ret < 0 && errno == ENOSYS) + SKIP(return, "kcmp() syscall not supported"); + EXPECT_EQ(ret, 0); ret = fcntl(fd, F_GETFD); ASSERT_GE(ret, 0); -- cgit v1.3.1 From 90da74af349e8a476e1d357da735b8f35b56d4e6 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:27 +0300 Subject: selftests: pidfd: add CONFIG_CHECKPOINT_RESTORE=y to config kcmp syscall is used in pidfd_getfd_test.c, so add CONFIG_CHECKPOINT_RESTORE=y to config to ensure kcmp is available. Signed-off-by: Tommi Rantala Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index bb11de90c0c9..f6f2965e17af 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,3 +4,4 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y -- cgit v1.3.1 From 7b9621d4593199aa0268e56081fe730b71c053e6 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:28 +0300 Subject: selftests: pidfd: drop needless linux/kcmp.h inclusion in pidfd_setns_test.c kcmp is not used in pidfd_setns_test.c, so do not include Signed-off-by: Tommi Rantala Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_setns_test.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 1f085b922c6e..6e2f2cd400ca 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "pidfd.h" #include "../clone3/clone3_selftests.h" -- cgit v1.3.1 From 40723419f4079d0c7de98d0f3149db915557b55a Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 11:49:41 +0000 Subject: kselftest: Enable vDSO test on non x86 platforms Currently the vDSO tests are built only on x86 platforms and cannot be cross compiled. Enable vDSO TARGET for all the platforms. Future patches will extend the tests. Cc: Shuah Khan Signed-off-by: Vincenzo Frascino Acked-by: Thomas Gleixner Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/vDSO/Makefile | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d9c283503159..2dd60070d775 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -65,6 +65,7 @@ endif TARGETS += tmpfs TARGETS += tpm2 TARGETS += user +TARGETS += vDSO TARGETS += vm TARGETS += x86 TARGETS += zram diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 0069f2f83f86..47031f3c5f3a 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -9,7 +9,6 @@ ifeq ($(ARCH),x86) TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 endif -ifndef CROSS_COMPILE CFLAGS := -std=gnu99 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector ifeq ($(CONFIG_X86_32),y) @@ -24,4 +23,3 @@ $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c vdso_standalone_test_x86.c parse_vdso.c \ -o $@ -endif -- cgit v1.3.1 From 693f5ca08ca0767b407b7ca634dbf1b783676ec3 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 11:49:42 +0000 Subject: kselftest: Extend vDSO selftest The current version of the multiarch vDSO selftest verifies only gettimeofday. Extend the vDSO selftest to the other library functions: - time - clock_getres - clock_gettime The extension has been used to verify the unified vdso library on the supported architectures. Cc: Shuah Khan Signed-off-by: Vincenzo Frascino Acked-by: Thomas Gleixner Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/Makefile | 2 + tools/testing/selftests/vDSO/vdso_config.h | 90 ++++++++++ tools/testing/selftests/vDSO/vdso_test_abi.c | 244 +++++++++++++++++++++++++++ 3 files changed, 336 insertions(+) create mode 100644 tools/testing/selftests/vDSO/vdso_config.h create mode 100644 tools/testing/selftests/vDSO/vdso_test_abi.c (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 47031f3c5f3a..2ac7448bd414 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -5,6 +5,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu +TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi ifeq ($(ARCH),x86) TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 endif @@ -18,6 +19,7 @@ endif all: $(TEST_GEN_PROGS) $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c +$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ vdso_standalone_test_x86.c parse_vdso.c \ diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h new file mode 100644 index 000000000000..eeb725df6045 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * vdso_config.h: Configuration options for vDSO tests. + * Copyright (c) 2019 Arm Ltd. + */ +#ifndef __VDSO_CONFIG_H__ +#define __VDSO_CONFIG_H__ + +/* + * Each architecture exports its vDSO implementation with different names + * and a different version from the others, so we need to handle it as a + * special case. + */ +#if defined(__arm__) +#define VDSO_VERSION 0 +#define VDSO_NAMES 1 +#define VDSO_32BIT 1 +#elif defined(__aarch64__) +#define VDSO_VERSION 3 +#define VDSO_NAMES 0 +#elif defined(__powerpc__) +#define VDSO_VERSION 1 +#define VDSO_NAMES 0 +#define VDSO_32BIT 1 +#elif defined(__powerpc64__) +#define VDSO_VERSION 1 +#define VDSO_NAMES 0 +#elif defined (__s390__) +#define VDSO_VERSION 2 +#define VDSO_NAMES 0 +#define VDSO_32BIT 1 +#elif defined (__s390X__) +#define VDSO_VERSION 2 +#define VDSO_NAMES 0 +#elif defined(__mips__) +#define VDSO_VERSION 0 +#define VDSO_NAMES 1 +#define VDSO_32BIT 1 +#elif defined(__sparc__) +#define VDSO_VERSION 0 +#define VDSO_NAMES 1 +#define VDSO_32BIT 1 +#elif defined(__i386__) +#define VDSO_VERSION 0 +#define VDSO_NAMES 1 +#define VDSO_32BIT 1 +#elif defined(__x86_64__) +#define VDSO_VERSION 0 +#define VDSO_NAMES 1 +#elif defined(__riscv__) +#define VDSO_VERSION 5 +#define VDSO_NAMES 1 +#define VDSO_32BIT 1 +#else /* nds32 */ +#define VDSO_VERSION 4 +#define VDSO_NAMES 1 +#define VDSO_32BIT 1 +#endif + +static const char *versions[6] = { + "LINUX_2.6", + "LINUX_2.6.15", + "LINUX_2.6.29", + "LINUX_2.6.39", + "LINUX_4", + "LINUX_4.15", +}; + +static const char *names[2][5] = { + { + "__kernel_gettimeofday", + "__kernel_clock_gettime", + "__kernel_time", + "__kernel_clock_getres", +#if defined(VDSO_32BIT) + "__kernel_clock_gettime64", +#endif + }, + { + "__vdso_gettimeofday", + "__vdso_clock_gettime", + "__vdso_time", + "__vdso_clock_getres", +#if defined(VDSO_32BIT) + "__vdso_clock_gettime64", +#endif + }, +}; + +#endif /* __VDSO_CONFIG_H__ */ diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c new file mode 100644 index 000000000000..3d603f1394af --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vdso_full_test.c: Sample code to test all the timers. + * Copyright (c) 2019 Arm Ltd. + * + * Compile with: + * gcc -std=gnu99 vdso_full_test.c parse_vdso.c + * + */ + +#include +#include +#include +#include +#include +#include +#define _GNU_SOURCE +#include +#include + +#include "../kselftest.h" +#include "vdso_config.h" + +extern void *vdso_sym(const char *version, const char *name); +extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); +extern void vdso_init_from_auxv(void *auxv); + +static const char *version; +static const char **name; + +typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz); +typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); +typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); +typedef time_t (*vdso_time_t)(time_t *t); + +static int vdso_test_gettimeofday(void) +{ + /* Find gettimeofday. */ + vdso_gettimeofday_t vdso_gettimeofday = + (vdso_gettimeofday_t)vdso_sym(version, name[0]); + + if (!vdso_gettimeofday) { + printf("Could not find %s\n", name[0]); + return KSFT_SKIP; + } + + struct timeval tv; + long ret = vdso_gettimeofday(&tv, 0); + + if (ret == 0) { + printf("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); + } else { + printf("%s failed\n", name[0]); + return KSFT_FAIL; + } + + return KSFT_PASS; +} + +static int vdso_test_clock_gettime(clockid_t clk_id) +{ + /* Find clock_gettime. */ + vdso_clock_gettime_t vdso_clock_gettime = + (vdso_clock_gettime_t)vdso_sym(version, name[1]); + + if (!vdso_clock_gettime) { + printf("Could not find %s\n", name[1]); + return KSFT_SKIP; + } + + struct timespec ts; + long ret = vdso_clock_gettime(clk_id, &ts); + + if (ret == 0) { + printf("The time is %lld.%06lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); + } else { + printf("%s failed\n", name[1]); + return KSFT_FAIL; + } + + return KSFT_PASS; +} + +static int vdso_test_time(void) +{ + /* Find time. */ + vdso_time_t vdso_time = + (vdso_time_t)vdso_sym(version, name[2]); + + if (!vdso_time) { + printf("Could not find %s\n", name[2]); + return KSFT_SKIP; + } + + long ret = vdso_time(NULL); + + if (ret > 0) { + printf("The time in hours since January 1, 1970 is %lld\n", + (long long)(ret / 3600)); + } else { + printf("%s failed\n", name[2]); + return KSFT_FAIL; + } + + return KSFT_PASS; +} + +static int vdso_test_clock_getres(clockid_t clk_id) +{ + /* Find clock_getres. */ + vdso_clock_getres_t vdso_clock_getres = + (vdso_clock_getres_t)vdso_sym(version, name[3]); + + if (!vdso_clock_getres) { + printf("Could not find %s\n", name[3]); + return KSFT_SKIP; + } + + struct timespec ts, sys_ts; + long ret = vdso_clock_getres(clk_id, &ts); + + if (ret == 0) { + printf("The resolution is %lld %lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); + } else { + printf("%s failed\n", name[3]); + return KSFT_FAIL; + } + + ret = syscall(SYS_clock_getres, clk_id, &sys_ts); + + if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) { + printf("%s failed\n", name[3]); + return KSFT_FAIL; + } + + return KSFT_PASS; +} + +const char *vdso_clock_name[12] = { + "CLOCK_REALTIME", + "CLOCK_MONOTONIC", + "CLOCK_PROCESS_CPUTIME_ID", + "CLOCK_THREAD_CPUTIME_ID", + "CLOCK_MONOTONIC_RAW", + "CLOCK_REALTIME_COARSE", + "CLOCK_MONOTONIC_COARSE", + "CLOCK_BOOTTIME", + "CLOCK_REALTIME_ALARM", + "CLOCK_BOOTTIME_ALARM", + "CLOCK_SGI_CYCLE", + "CLOCK_TAI", +}; + +/* + * This function calls vdso_test_clock_gettime and vdso_test_clock_getres + * with different values for clock_id. + */ +static inline int vdso_test_clock(clockid_t clock_id) +{ + int ret0, ret1; + + ret0 = vdso_test_clock_gettime(clock_id); + /* A skipped test is considered passed */ + if (ret0 == KSFT_SKIP) + ret0 = KSFT_PASS; + + ret1 = vdso_test_clock_getres(clock_id); + /* A skipped test is considered passed */ + if (ret1 == KSFT_SKIP) + ret1 = KSFT_PASS; + + ret0 += ret1; + + printf("clock_id: %s", vdso_clock_name[clock_id]); + + if (ret0 > 0) + printf(" [FAIL]\n"); + else + printf(" [PASS]\n"); + + return ret0; +} + +int main(int argc, char **argv) +{ + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + int ret; + + if (!sysinfo_ehdr) { + printf("AT_SYSINFO_EHDR is not present!\n"); + return KSFT_SKIP; + } + + version = versions[VDSO_VERSION]; + name = (const char **)&names[VDSO_NAMES]; + + printf("[vDSO kselftest] VDSO_VERSION: %s\n", version); + + vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); + + ret = vdso_test_gettimeofday(); + +#if _POSIX_TIMERS > 0 + +#ifdef CLOCK_REALTIME + ret += vdso_test_clock(CLOCK_REALTIME); +#endif + +#ifdef CLOCK_BOOTTIME + ret += vdso_test_clock(CLOCK_BOOTTIME); +#endif + +#ifdef CLOCK_TAI + ret += vdso_test_clock(CLOCK_TAI); +#endif + +#ifdef CLOCK_REALTIME_COARSE + ret += vdso_test_clock(CLOCK_REALTIME_COARSE); +#endif + +#ifdef CLOCK_MONOTONIC + ret += vdso_test_clock(CLOCK_MONOTONIC); +#endif + +#ifdef CLOCK_MONOTONIC_RAW + ret += vdso_test_clock(CLOCK_MONOTONIC_RAW); +#endif + +#ifdef CLOCK_MONOTONIC_COARSE + ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE); +#endif + +#endif + + ret += vdso_test_time(); + + if (ret > 0) + return KSFT_FAIL; + + return KSFT_PASS; +} -- cgit v1.3.1 From 03f55c7952c92d8577d6e9bc695f3fd20032cfd9 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 11:49:43 +0000 Subject: kselftest: Extend vDSO selftest to clock_getres The current version of the multiarch vDSO selftest verifies only gettimeofday. Extend the vDSO selftest to clock_getres, to verify that the syscall and the vDSO library function return the same information. The extension has been used to verify the hrtimer_resoltion fix. Cc: Shuah Khan Signed-off-by: Vincenzo Frascino Acked-by: Thomas Gleixner Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/Makefile | 2 + .../selftests/vDSO/vdso_test_clock_getres.c | 124 +++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 tools/testing/selftests/vDSO/vdso_test_clock_getres.c (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 2ac7448bd414..375d80c2bff5 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -6,6 +6,7 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi +TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres ifeq ($(ARCH),x86) TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 endif @@ -20,6 +21,7 @@ all: $(TEST_GEN_PROGS) $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c +$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ vdso_standalone_test_x86.c parse_vdso.c \ diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c new file mode 100644 index 000000000000..15dcee16ff72 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * vdso_clock_getres.c: Sample code to test clock_getres. + * Copyright (c) 2019 Arm Ltd. + * + * Compile with: + * gcc -std=gnu99 vdso_clock_getres.c + * + * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit), + * Power (32-bit and 64-bit), S390x (32-bit and 64-bit). + * Might work on other architectures. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest.h" + +static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts) +{ + long ret; + + ret = syscall(SYS_clock_getres, _clkid, _ts); + + return ret; +} + +const char *vdso_clock_name[12] = { + "CLOCK_REALTIME", + "CLOCK_MONOTONIC", + "CLOCK_PROCESS_CPUTIME_ID", + "CLOCK_THREAD_CPUTIME_ID", + "CLOCK_MONOTONIC_RAW", + "CLOCK_REALTIME_COARSE", + "CLOCK_MONOTONIC_COARSE", + "CLOCK_BOOTTIME", + "CLOCK_REALTIME_ALARM", + "CLOCK_BOOTTIME_ALARM", + "CLOCK_SGI_CYCLE", + "CLOCK_TAI", +}; + +/* + * This function calls clock_getres in vdso and by system call + * with different values for clock_id. + * + * Example of output: + * + * clock_id: CLOCK_REALTIME [PASS] + * clock_id: CLOCK_BOOTTIME [PASS] + * clock_id: CLOCK_TAI [PASS] + * clock_id: CLOCK_REALTIME_COARSE [PASS] + * clock_id: CLOCK_MONOTONIC [PASS] + * clock_id: CLOCK_MONOTONIC_RAW [PASS] + * clock_id: CLOCK_MONOTONIC_COARSE [PASS] + */ +static inline int vdso_test_clock(unsigned int clock_id) +{ + struct timespec x, y; + + printf("clock_id: %s", vdso_clock_name[clock_id]); + clock_getres(clock_id, &x); + syscall_clock_getres(clock_id, &y); + + if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) { + printf(" [FAIL]\n"); + return KSFT_FAIL; + } + + printf(" [PASS]\n"); + return KSFT_PASS; +} + +int main(int argc, char **argv) +{ + int ret; + +#if _POSIX_TIMERS > 0 + +#ifdef CLOCK_REALTIME + ret = vdso_test_clock(CLOCK_REALTIME); +#endif + +#ifdef CLOCK_BOOTTIME + ret += vdso_test_clock(CLOCK_BOOTTIME); +#endif + +#ifdef CLOCK_TAI + ret += vdso_test_clock(CLOCK_TAI); +#endif + +#ifdef CLOCK_REALTIME_COARSE + ret += vdso_test_clock(CLOCK_REALTIME_COARSE); +#endif + +#ifdef CLOCK_MONOTONIC + ret += vdso_test_clock(CLOCK_MONOTONIC); +#endif + +#ifdef CLOCK_MONOTONIC_RAW + ret += vdso_test_clock(CLOCK_MONOTONIC_RAW); +#endif + +#ifdef CLOCK_MONOTONIC_COARSE + ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE); +#endif + +#endif + if (ret > 0) + return KSFT_FAIL; + + return KSFT_PASS; +} -- cgit v1.3.1 From c7e5789b24d36dd5dddd36ea2b99280a606cac42 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 11:49:44 +0000 Subject: kselftest: Move test_vdso to the vDSO test suite Move test_vdso from x86 to the vDSO test suite. Suggested-by: Andy Lutomirski Cc: Shuah Khan Signed-off-by: Vincenzo Frascino Acked-by: Thomas Gleixner Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/Makefile | 10 +- .../testing/selftests/vDSO/vdso_test_correctness.c | 342 +++++++++++++++++++++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/test_vdso.c | 342 --------------------- 4 files changed, 351 insertions(+), 345 deletions(-) create mode 100644 tools/testing/selftests/vDSO/vdso_test_correctness.c delete mode 100644 tools/testing/selftests/x86/test_vdso.c (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 375d80c2bff5..d53a4d8008f9 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -7,12 +7,14 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres -ifeq ($(ARCH),x86) +ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 endif +TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness CFLAGS := -std=gnu99 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector +LDFLAGS_vdso_test_correctness := -ldl ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif @@ -26,4 +28,8 @@ $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ vdso_standalone_test_x86.c parse_vdso.c \ -o $@ - +$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c + $(CC) $(CFLAGS) \ + vdso_test_correctness.c \ + -o $@ \ + $(LDFLAGS_vdso_test_correctness) diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c new file mode 100644 index 000000000000..42052db0f870 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ldt_gdt.c - Test cases for LDT and GDT access + * Copyright (c) 2011-2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef SYS_getcpu +# ifdef __x86_64__ +# define SYS_getcpu 309 +# else +# define SYS_getcpu 318 +# endif +#endif + +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + +int nerrs = 0; + +typedef int (*vgettime_t)(clockid_t, struct timespec *); + +vgettime_t vdso_clock_gettime; + +typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); + +vgtod_t vdso_gettimeofday; + +typedef long (*getcpu_t)(unsigned *, unsigned *, void *); + +getcpu_t vgetcpu; +getcpu_t vdso_getcpu; + +static void *vsyscall_getcpu(void) +{ +#ifdef __x86_64__ + FILE *maps; + char line[MAPS_LINE_LEN]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ + return NULL; + + while (fgets(line, MAPS_LINE_LEN, maps)) { + char r, x; + void *start, *end; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + /* assume entries are OK, as we test vDSO here not vsyscall */ + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("Warning: failed to find vsyscall getcpu\n"); + return NULL; + } + return (void *) (0xffffffffff600800); +#else + return NULL; +#endif +} + + +static void fill_function_pointers() +{ + void *vdso = dlopen("linux-vdso.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-gate.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) { + printf("[WARN]\tfailed to find vDSO\n"); + return; + } + + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); + if (!vdso_getcpu) + printf("Warning: failed to find getcpu in vDSO\n"); + + vgetcpu = (getcpu_t) vsyscall_getcpu(); + + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_clock_gettime) + printf("Warning: failed to find clock_gettime in vDSO\n"); + + vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday"); + if (!vdso_gettimeofday) + printf("Warning: failed to find gettimeofday in vDSO\n"); + +} + +static long sys_getcpu(unsigned * cpu, unsigned * node, + void* cache) +{ + return syscall(__NR_getcpu, cpu, node, cache); +} + +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) +{ + return syscall(__NR_clock_gettime, id, ts); +} + +static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return syscall(__NR_gettimeofday, tv, tz); +} + +static void test_getcpu(void) +{ + printf("[RUN]\tTesting getcpu...\n"); + + for (int cpu = 0; ; cpu++) { + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + return; + + unsigned cpu_sys, cpu_vdso, cpu_vsys, + node_sys, node_vdso, node_vsys; + long ret_sys, ret_vdso = 1, ret_vsys = 1; + unsigned node; + + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); + if (vdso_getcpu) + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); + if (vgetcpu) + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); + + if (!ret_sys) + node = node_sys; + else if (!ret_vdso) + node = node_vdso; + else if (!ret_vsys) + node = node_vsys; + + bool ok = true; + if (!ret_sys && (cpu_sys != cpu || node_sys != node)) + ok = false; + if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node)) + ok = false; + if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node)) + ok = false; + + printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu); + if (!ret_sys) + printf(" syscall: cpu %u, node %u", cpu_sys, node_sys); + if (!ret_vdso) + printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso); + if (!ret_vsys) + printf(" vsyscall: cpu %u, node %u", cpu_vsys, + node_vsys); + printf("\n"); + + if (!ok) + nerrs++; + } +} + +static bool ts_leq(const struct timespec *a, const struct timespec *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_nsec <= b->tv_nsec; +} + +static bool tv_leq(const struct timeval *a, const struct timeval *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_usec <= b->tv_usec; +} + +static char const * const clocknames[] = { + [0] = "CLOCK_REALTIME", + [1] = "CLOCK_MONOTONIC", + [2] = "CLOCK_PROCESS_CPUTIME_ID", + [3] = "CLOCK_THREAD_CPUTIME_ID", + [4] = "CLOCK_MONOTONIC_RAW", + [5] = "CLOCK_REALTIME_COARSE", + [6] = "CLOCK_MONOTONIC_COARSE", + [7] = "CLOCK_BOOTTIME", + [8] = "CLOCK_REALTIME_ALARM", + [9] = "CLOCK_BOOTTIME_ALARM", + [10] = "CLOCK_SGI_CYCLE", + [11] = "CLOCK_TAI", +}; + +static void test_one_clock_gettime(int clock, const char *name) +{ + struct timespec start, vdso, end; + int vdso_ret, end_ret; + + printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock); + + if (sys_clock_gettime(clock, &start) < 0) { + if (errno == EINVAL) { + vdso_ret = vdso_clock_gettime(clock, &vdso); + if (vdso_ret == -EINVAL) { + printf("[OK]\tNo such clock.\n"); + } else { + printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret); + nerrs++; + } + } else { + printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno); + } + return; + } + + vdso_ret = vdso_clock_gettime(clock, &vdso); + end_ret = sys_clock_gettime(clock, &end); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + (unsigned long long)start.tv_sec, start.tv_nsec, + (unsigned long long)vdso.tv_sec, vdso.tv_nsec, + (unsigned long long)end.tv_sec, end.tv_nsec); + + if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } +} + +static void test_clock_gettime(void) +{ + if (!vdso_clock_gettime) { + printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n"); + return; + } + + for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); + clock++) { + test_one_clock_gettime(clock, clocknames[clock]); + } + + /* Also test some invalid clock ids */ + test_one_clock_gettime(-1, "invalid"); + test_one_clock_gettime(INT_MIN, "invalid"); + test_one_clock_gettime(INT_MAX, "invalid"); +} + +static void test_gettimeofday(void) +{ + struct timeval start, vdso, end; + struct timezone sys_tz, vdso_tz; + int vdso_ret, end_ret; + + if (!vdso_gettimeofday) + return; + + printf("[RUN]\tTesting gettimeofday...\n"); + + if (sys_gettimeofday(&start, &sys_tz) < 0) { + printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno); + nerrs++; + return; + } + + vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); + end_ret = sys_gettimeofday(&end, NULL); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n", + (unsigned long long)start.tv_sec, start.tv_usec, + (unsigned long long)vdso.tv_sec, vdso.tv_usec, + (unsigned long long)end.tv_sec, end.tv_usec); + + if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } + + if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest && + sys_tz.tz_dsttime == vdso_tz.tz_dsttime) { + printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n", + sys_tz.tz_minuteswest, sys_tz.tz_dsttime); + } else { + printf("[FAIL]\ttimezones do not match\n"); + nerrs++; + } + + /* And make sure that passing NULL for tz doesn't crash. */ + vdso_gettimeofday(&vdso, NULL); +} + +int main(int argc, char **argv) +{ + fill_function_pointers(); + + test_clock_gettime(); + test_gettimeofday(); + + /* + * Test getcpu() last so that, if something goes wrong setting affinity, + * we still run the other tests. + */ + test_getcpu(); + + return nerrs ? 1 : 0; +} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 6703c7906b71..333980375bc7 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - test_vdso test_vsyscall mov_ss_trap \ + test_vsyscall mov_ss_trap \ syscall_arg_fault fsgsbase_restore TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c deleted file mode 100644 index 42052db0f870..000000000000 --- a/tools/testing/selftests/x86/test_vdso.c +++ /dev/null @@ -1,342 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ldt_gdt.c - Test cases for LDT and GDT access - * Copyright (c) 2011-2015 Andrew Lutomirski - */ - -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef SYS_getcpu -# ifdef __x86_64__ -# define SYS_getcpu 309 -# else -# define SYS_getcpu 318 -# endif -#endif - -/* max length of lines in /proc/self/maps - anything longer is skipped here */ -#define MAPS_LINE_LEN 128 - -int nerrs = 0; - -typedef int (*vgettime_t)(clockid_t, struct timespec *); - -vgettime_t vdso_clock_gettime; - -typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); - -vgtod_t vdso_gettimeofday; - -typedef long (*getcpu_t)(unsigned *, unsigned *, void *); - -getcpu_t vgetcpu; -getcpu_t vdso_getcpu; - -static void *vsyscall_getcpu(void) -{ -#ifdef __x86_64__ - FILE *maps; - char line[MAPS_LINE_LEN]; - bool found = false; - - maps = fopen("/proc/self/maps", "r"); - if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ - return NULL; - - while (fgets(line, MAPS_LINE_LEN, maps)) { - char r, x; - void *start, *end; - char name[MAPS_LINE_LEN]; - - /* sscanf() is safe here as strlen(name) >= strlen(line) */ - if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", - &start, &end, &r, &x, name) != 5) - continue; - - if (strcmp(name, "[vsyscall]")) - continue; - - /* assume entries are OK, as we test vDSO here not vsyscall */ - found = true; - break; - } - - fclose(maps); - - if (!found) { - printf("Warning: failed to find vsyscall getcpu\n"); - return NULL; - } - return (void *) (0xffffffffff600800); -#else - return NULL; -#endif -} - - -static void fill_function_pointers() -{ - void *vdso = dlopen("linux-vdso.so.1", - RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) - vdso = dlopen("linux-gate.so.1", - RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) { - printf("[WARN]\tfailed to find vDSO\n"); - return; - } - - vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); - if (!vdso_getcpu) - printf("Warning: failed to find getcpu in vDSO\n"); - - vgetcpu = (getcpu_t) vsyscall_getcpu(); - - vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); - if (!vdso_clock_gettime) - printf("Warning: failed to find clock_gettime in vDSO\n"); - - vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday"); - if (!vdso_gettimeofday) - printf("Warning: failed to find gettimeofday in vDSO\n"); - -} - -static long sys_getcpu(unsigned * cpu, unsigned * node, - void* cache) -{ - return syscall(__NR_getcpu, cpu, node, cache); -} - -static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) -{ - return syscall(__NR_clock_gettime, id, ts); -} - -static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return syscall(__NR_gettimeofday, tv, tz); -} - -static void test_getcpu(void) -{ - printf("[RUN]\tTesting getcpu...\n"); - - for (int cpu = 0; ; cpu++) { - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); - if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) - return; - - unsigned cpu_sys, cpu_vdso, cpu_vsys, - node_sys, node_vdso, node_vsys; - long ret_sys, ret_vdso = 1, ret_vsys = 1; - unsigned node; - - ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); - if (vdso_getcpu) - ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); - if (vgetcpu) - ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); - - if (!ret_sys) - node = node_sys; - else if (!ret_vdso) - node = node_vdso; - else if (!ret_vsys) - node = node_vsys; - - bool ok = true; - if (!ret_sys && (cpu_sys != cpu || node_sys != node)) - ok = false; - if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node)) - ok = false; - if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node)) - ok = false; - - printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu); - if (!ret_sys) - printf(" syscall: cpu %u, node %u", cpu_sys, node_sys); - if (!ret_vdso) - printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso); - if (!ret_vsys) - printf(" vsyscall: cpu %u, node %u", cpu_vsys, - node_vsys); - printf("\n"); - - if (!ok) - nerrs++; - } -} - -static bool ts_leq(const struct timespec *a, const struct timespec *b) -{ - if (a->tv_sec != b->tv_sec) - return a->tv_sec < b->tv_sec; - else - return a->tv_nsec <= b->tv_nsec; -} - -static bool tv_leq(const struct timeval *a, const struct timeval *b) -{ - if (a->tv_sec != b->tv_sec) - return a->tv_sec < b->tv_sec; - else - return a->tv_usec <= b->tv_usec; -} - -static char const * const clocknames[] = { - [0] = "CLOCK_REALTIME", - [1] = "CLOCK_MONOTONIC", - [2] = "CLOCK_PROCESS_CPUTIME_ID", - [3] = "CLOCK_THREAD_CPUTIME_ID", - [4] = "CLOCK_MONOTONIC_RAW", - [5] = "CLOCK_REALTIME_COARSE", - [6] = "CLOCK_MONOTONIC_COARSE", - [7] = "CLOCK_BOOTTIME", - [8] = "CLOCK_REALTIME_ALARM", - [9] = "CLOCK_BOOTTIME_ALARM", - [10] = "CLOCK_SGI_CYCLE", - [11] = "CLOCK_TAI", -}; - -static void test_one_clock_gettime(int clock, const char *name) -{ - struct timespec start, vdso, end; - int vdso_ret, end_ret; - - printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock); - - if (sys_clock_gettime(clock, &start) < 0) { - if (errno == EINVAL) { - vdso_ret = vdso_clock_gettime(clock, &vdso); - if (vdso_ret == -EINVAL) { - printf("[OK]\tNo such clock.\n"); - } else { - printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret); - nerrs++; - } - } else { - printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno); - } - return; - } - - vdso_ret = vdso_clock_gettime(clock, &vdso); - end_ret = sys_clock_gettime(clock, &end); - - if (vdso_ret != 0 || end_ret != 0) { - printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", - vdso_ret, errno); - nerrs++; - return; - } - - printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", - (unsigned long long)start.tv_sec, start.tv_nsec, - (unsigned long long)vdso.tv_sec, vdso.tv_nsec, - (unsigned long long)end.tv_sec, end.tv_nsec); - - if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) { - printf("[FAIL]\tTimes are out of sequence\n"); - nerrs++; - } -} - -static void test_clock_gettime(void) -{ - if (!vdso_clock_gettime) { - printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n"); - return; - } - - for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); - clock++) { - test_one_clock_gettime(clock, clocknames[clock]); - } - - /* Also test some invalid clock ids */ - test_one_clock_gettime(-1, "invalid"); - test_one_clock_gettime(INT_MIN, "invalid"); - test_one_clock_gettime(INT_MAX, "invalid"); -} - -static void test_gettimeofday(void) -{ - struct timeval start, vdso, end; - struct timezone sys_tz, vdso_tz; - int vdso_ret, end_ret; - - if (!vdso_gettimeofday) - return; - - printf("[RUN]\tTesting gettimeofday...\n"); - - if (sys_gettimeofday(&start, &sys_tz) < 0) { - printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno); - nerrs++; - return; - } - - vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); - end_ret = sys_gettimeofday(&end, NULL); - - if (vdso_ret != 0 || end_ret != 0) { - printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", - vdso_ret, errno); - nerrs++; - return; - } - - printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n", - (unsigned long long)start.tv_sec, start.tv_usec, - (unsigned long long)vdso.tv_sec, vdso.tv_usec, - (unsigned long long)end.tv_sec, end.tv_usec); - - if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) { - printf("[FAIL]\tTimes are out of sequence\n"); - nerrs++; - } - - if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest && - sys_tz.tz_dsttime == vdso_tz.tz_dsttime) { - printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n", - sys_tz.tz_minuteswest, sys_tz.tz_dsttime); - } else { - printf("[FAIL]\ttimezones do not match\n"); - nerrs++; - } - - /* And make sure that passing NULL for tz doesn't crash. */ - vdso_gettimeofday(&vdso, NULL); -} - -int main(int argc, char **argv) -{ - fill_function_pointers(); - - test_clock_gettime(); - test_gettimeofday(); - - /* - * Test getcpu() last so that, if something goes wrong setting affinity, - * we still run the other tests. - */ - test_getcpu(); - - return nerrs ? 1 : 0; -} -- cgit v1.3.1 From b2f1c3db28870d88d1a19aa86a8374e7725d62c5 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 11:49:45 +0000 Subject: kselftest: Extend vdso correctness test to clock_gettime64 With the release of Linux 5.1 has been added a new syscall, clock_gettime64, that provided a 64 bit time value for a specified clock_ID to make the kernel Y2038 safe on 32 bit architectures. Extend the vdso correctness test to cover the newly exposed vdso function. Cc: Shuah Khan Signed-off-by: Vincenzo Frascino Acked-by: Thomas Gleixner Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_config.h | 4 +- .../testing/selftests/vDSO/vdso_test_correctness.c | 115 ++++++++++++++++++++- 2 files changed, 115 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h index eeb725df6045..6a6fe8d4ff55 100644 --- a/tools/testing/selftests/vDSO/vdso_config.h +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -66,12 +66,13 @@ static const char *versions[6] = { "LINUX_4.15", }; -static const char *names[2][5] = { +static const char *names[2][6] = { { "__kernel_gettimeofday", "__kernel_clock_gettime", "__kernel_time", "__kernel_clock_getres", + "__kernel_getcpu", #if defined(VDSO_32BIT) "__kernel_clock_gettime64", #endif @@ -81,6 +82,7 @@ static const char *names[2][5] = { "__vdso_clock_gettime", "__vdso_time", "__vdso_clock_getres", + "__vdso_getcpu", #if defined(VDSO_32BIT) "__vdso_clock_gettime64", #endif diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 42052db0f870..5029ef9b228c 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -19,6 +19,10 @@ #include #include +#include "vdso_config.h" + +static const char **name; + #ifndef SYS_getcpu # ifdef __x86_64__ # define SYS_getcpu 309 @@ -27,6 +31,17 @@ # endif #endif +#ifndef __NR_clock_gettime64 +#define __NR_clock_gettime64 403 +#endif + +#ifndef __kernel_timespec +struct __kernel_timespec { + long long tv_sec; + long long tv_nsec; +}; +#endif + /* max length of lines in /proc/self/maps - anything longer is skipped here */ #define MAPS_LINE_LEN 128 @@ -36,6 +51,10 @@ typedef int (*vgettime_t)(clockid_t, struct timespec *); vgettime_t vdso_clock_gettime; +typedef int (*vgettime64_t)(clockid_t, struct __kernel_timespec *); + +vgettime64_t vdso_clock_gettime64; + typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); vgtod_t vdso_gettimeofday; @@ -99,17 +118,23 @@ static void fill_function_pointers() return; } - vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); + vdso_getcpu = (getcpu_t)dlsym(vdso, name[4]); if (!vdso_getcpu) printf("Warning: failed to find getcpu in vDSO\n"); vgetcpu = (getcpu_t) vsyscall_getcpu(); - vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + vdso_clock_gettime = (vgettime_t)dlsym(vdso, name[1]); if (!vdso_clock_gettime) printf("Warning: failed to find clock_gettime in vDSO\n"); - vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday"); +#if defined(VDSO_32BIT) + vdso_clock_gettime64 = (vgettime64_t)dlsym(vdso, name[5]); + if (!vdso_clock_gettime64) + printf("Warning: failed to find clock_gettime64 in vDSO\n"); +#endif + + vdso_gettimeofday = (vgtod_t)dlsym(vdso, name[0]); if (!vdso_gettimeofday) printf("Warning: failed to find gettimeofday in vDSO\n"); @@ -126,6 +151,11 @@ static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) return syscall(__NR_clock_gettime, id, ts); } +static inline int sys_clock_gettime64(clockid_t id, struct __kernel_timespec *ts) +{ + return syscall(__NR_clock_gettime64, id, ts); +} + static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) { return syscall(__NR_gettimeofday, tv, tz); @@ -191,6 +221,15 @@ static bool ts_leq(const struct timespec *a, const struct timespec *b) return a->tv_nsec <= b->tv_nsec; } +static bool ts64_leq(const struct __kernel_timespec *a, + const struct __kernel_timespec *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_nsec <= b->tv_nsec; +} + static bool tv_leq(const struct timeval *a, const struct timeval *b) { if (a->tv_sec != b->tv_sec) @@ -254,7 +293,10 @@ static void test_one_clock_gettime(int clock, const char *name) if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) { printf("[FAIL]\tTimes are out of sequence\n"); nerrs++; + return; } + + printf("[OK]\tTest Passed.\n"); } static void test_clock_gettime(void) @@ -275,6 +317,70 @@ static void test_clock_gettime(void) test_one_clock_gettime(INT_MAX, "invalid"); } +static void test_one_clock_gettime64(int clock, const char *name) +{ + struct __kernel_timespec start, vdso, end; + int vdso_ret, end_ret; + + printf("[RUN]\tTesting clock_gettime64 for clock %s (%d)...\n", name, clock); + + if (sys_clock_gettime64(clock, &start) < 0) { + if (errno == EINVAL) { + vdso_ret = vdso_clock_gettime64(clock, &vdso); + if (vdso_ret == -EINVAL) { + printf("[OK]\tNo such clock.\n"); + } else { + printf("[FAIL]\tNo such clock, but __vdso_clock_gettime64 returned %d\n", vdso_ret); + nerrs++; + } + } else { + printf("[WARN]\t clock_gettime64(%d) syscall returned error %d\n", clock, errno); + } + return; + } + + vdso_ret = vdso_clock_gettime64(clock, &vdso); + end_ret = sys_clock_gettime64(clock, &end); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + (unsigned long long)start.tv_sec, start.tv_nsec, + (unsigned long long)vdso.tv_sec, vdso.tv_nsec, + (unsigned long long)end.tv_sec, end.tv_nsec); + + if (!ts64_leq(&start, &vdso) || !ts64_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + return; + } + + printf("[OK]\tTest Passed.\n"); +} + +static void test_clock_gettime64(void) +{ + if (!vdso_clock_gettime64) { + printf("[SKIP]\tNo vDSO, so skipping clock_gettime64() tests\n"); + return; + } + + for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); + clock++) { + test_one_clock_gettime64(clock, clocknames[clock]); + } + + /* Also test some invalid clock ids */ + test_one_clock_gettime64(-1, "invalid"); + test_one_clock_gettime64(INT_MIN, "invalid"); + test_one_clock_gettime64(INT_MAX, "invalid"); +} + static void test_gettimeofday(void) { struct timeval start, vdso, end; @@ -327,9 +433,12 @@ static void test_gettimeofday(void) int main(int argc, char **argv) { + name = (const char **)&names[VDSO_NAMES]; + fill_function_pointers(); test_clock_gettime(); + test_clock_gettime64(); test_gettimeofday(); /* -- cgit v1.3.1 From 5bc7c1156f3f16d7e244abaf8c80566af32cb092 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 12:12:43 +0000 Subject: kselftest/arm64: Fix check_buffer_fill test The check_buffer_fill test reports the error below because the test plan is not declared correctly: # Planned tests != run tests (0 != 20) Fix the test adding the correct test plan declaration. Fixes: e9b60476bea0 ("kselftest/arm64: Add utilities and a test to validate mte memory") Signed-off-by: Vincenzo Frascino Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Cc: Gabor Kertesz Cc: Amit Daniel Kachhap Link: https://lore.kernel.org/r/20201026121248.2340-2-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/check_buffer_fill.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 242635d79035..c9fa141ebdcc 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -417,6 +417,9 @@ int main(int argc, char *argv[]) /* Register SIGSEGV handler */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(20); + /* Buffer by byte tests */ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR), "Check buffer correctness by byte with sync err mode and mmap memory\n"); -- cgit v1.3.1 From 041fa41f5422ee275a8f60a182e1b32d196a1a2a Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 12:12:44 +0000 Subject: kselftest/arm64: Fix check_tags_inclusion test The check_tags_inclusion test reports the error below because the test plan is not declared correctly: # Planned tests != run tests (0 != 4) Fix the test adding the correct test plan declaration. Fixes: f3b2a26ca78d ("kselftest/arm64: Verify mte tag inclusion via prctl") Signed-off-by: Vincenzo Frascino Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Cc: Gabor Kertesz Cc: Amit Daniel Kachhap Link: https://lore.kernel.org/r/20201026121248.2340-3-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/check_tags_inclusion.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index 94d245a0ed56..deaef1f61076 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -170,6 +170,9 @@ int main(int argc, char *argv[]) /* Register SIGSEGV handler */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(4); + evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR), "Check an included tag value with sync mode\n"); evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR), -- cgit v1.3.1 From 386cf789fa6dc4e0f35d25cba9469d62321e7180 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 12:12:45 +0000 Subject: kselftest/arm64: Fix check_child_memory test The check_child_memory test reports the error below because the test plan is not declared correctly: # Planned tests != run tests (0 != 12) Fix the test adding the correct test plan declaration. Fixes: dfe537cf4718 ("kselftest/arm64: Check forked child mte memory accessibility") Signed-off-by: Vincenzo Frascino Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Cc: Gabor Kertesz Cc: Amit Daniel Kachhap Link: https://lore.kernel.org/r/20201026121248.2340-4-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/check_child_memory.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 97bebdecd29e..43bd94f853ba 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -163,6 +163,9 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler); mte_register_signal(SIGBUS, mte_default_handler); + /* Set test plan */ + ksft_set_plan(12); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), "Check child anonymous memory with private mapping, precise mode and mmap memory\n"); evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), -- cgit v1.3.1 From 7419390a466e20d4a00cecdb4a68af47d0b21ca5 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 12:12:46 +0000 Subject: kselftest/arm64: Fix check_mmap_options test The check_mmap_options test reports the error below because the test plan is not declared correctly: # Planned tests != run tests (0 != 22) Fix the test adding the correct test plan declaration. Fixes: 53ec81d23213 ("kselftest/arm64: Verify all different mmap MTE options") Signed-off-by: Vincenzo Frascino Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Cc: Gabor Kertesz Cc: Amit Daniel Kachhap Link: https://lore.kernel.org/r/20201026121248.2340-5-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/check_mmap_options.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 33b13b86199b..a04b12c21ac9 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -205,7 +205,11 @@ int main(int argc, char *argv[]) mte_register_signal(SIGBUS, mte_default_handler); mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(22); + mte_enable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), -- cgit v1.3.1 From cbb268af05de9beedc6994f4d90628f451d996d7 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 12:12:47 +0000 Subject: kselftest/arm64: Fix check_ksm_options test The check_ksm_options test reports the error below because the test plan is not declared correctly: # Planned tests != run tests (0 != 4) Fix the test adding the correct test plan declaration. Fixes: f981d8fa2646 ("kselftest/arm64: Verify KSM page merge for MTE pages") Signed-off-by: Vincenzo Frascino Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Cc: Gabor Kertesz Cc: Amit Daniel Kachhap Link: https://lore.kernel.org/r/20201026121248.2340-6-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/check_ksm_options.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index bc41ae630c86..3b23c4d61d38 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -140,6 +140,10 @@ int main(int argc, char *argv[]) /* Register signal handlers */ mte_register_signal(SIGBUS, mte_default_handler); mte_register_signal(SIGSEGV, mte_default_handler); + + /* Set test plan */ + ksft_set_plan(4); + /* Enable KSM */ mte_ksm_setup(); -- cgit v1.3.1 From 493b35db0548956bd598a54aea0b2bb1c6227e2f Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 26 Oct 2020 12:12:48 +0000 Subject: kselftest/arm64: Fix check_user_mem test The check_user_mem test reports the error below because the test plan is not declared correctly: # Planned tests != run tests (0 != 4) Fix the test adding the correct test plan declaration. Fixes: 4dafc08d0ba4 ("kselftest/arm64: Check mte tagged user address in kernel") Signed-off-by: Vincenzo Frascino Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Cc: Gabor Kertesz Cc: Amit Daniel Kachhap Link: https://lore.kernel.org/r/20201026121248.2340-7-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/check_user_mem.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 594e98e76880..4bfa80f2a8c3 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -92,9 +92,13 @@ int main(int argc, char *argv[]) err = mte_default_setup(); if (err) return err; + /* Register signal handlers */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(4); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), -- cgit v1.3.1 From d2692abd6fa9866fda3052efa5cbd116b9fec56b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:33 +0100 Subject: selftests: kselftest_harness.h: fix kernel-doc markups The kernel-doc markups there is violating the expected syntax, causing it to not parse the name of the markup identifier properly, preventing it to check if the kernel-doc matches the #define below each markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/697640045663f1366beb15e76e78b420dac5f5a2.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- tools/testing/selftests/kselftest_harness.h | 44 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index f19804df244c..d747d6b1da1a 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -432,7 +432,7 @@ */ /** - * ASSERT_EQ(expected, seen) + * ASSERT_EQ() * * @expected: expected value * @seen: measured value @@ -443,7 +443,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 1) /** - * ASSERT_NE(expected, seen) + * ASSERT_NE() * * @expected: expected value * @seen: measured value @@ -454,7 +454,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 1) /** - * ASSERT_LT(expected, seen) + * ASSERT_LT() * * @expected: expected value * @seen: measured value @@ -465,7 +465,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 1) /** - * ASSERT_LE(expected, seen) + * ASSERT_LE() * * @expected: expected value * @seen: measured value @@ -476,7 +476,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 1) /** - * ASSERT_GT(expected, seen) + * ASSERT_GT() * * @expected: expected value * @seen: measured value @@ -487,7 +487,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 1) /** - * ASSERT_GE(expected, seen) + * ASSERT_GE() * * @expected: expected value * @seen: measured value @@ -498,7 +498,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 1) /** - * ASSERT_NULL(seen) + * ASSERT_NULL() * * @seen: measured value * @@ -508,7 +508,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 1) /** - * ASSERT_TRUE(seen) + * ASSERT_TRUE() * * @seen: measured value * @@ -518,7 +518,7 @@ __EXPECT(0, "0", seen, #seen, !=, 1) /** - * ASSERT_FALSE(seen) + * ASSERT_FALSE() * * @seen: measured value * @@ -528,7 +528,7 @@ __EXPECT(0, "0", seen, #seen, ==, 1) /** - * ASSERT_STREQ(expected, seen) + * ASSERT_STREQ() * * @expected: expected value * @seen: measured value @@ -539,7 +539,7 @@ __EXPECT_STR(expected, seen, ==, 1) /** - * ASSERT_STRNE(expected, seen) + * ASSERT_STRNE() * * @expected: expected value * @seen: measured value @@ -550,7 +550,7 @@ __EXPECT_STR(expected, seen, !=, 1) /** - * EXPECT_EQ(expected, seen) + * EXPECT_EQ() * * @expected: expected value * @seen: measured value @@ -561,7 +561,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 0) /** - * EXPECT_NE(expected, seen) + * EXPECT_NE() * * @expected: expected value * @seen: measured value @@ -572,7 +572,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 0) /** - * EXPECT_LT(expected, seen) + * EXPECT_LT() * * @expected: expected value * @seen: measured value @@ -583,7 +583,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 0) /** - * EXPECT_LE(expected, seen) + * EXPECT_LE() * * @expected: expected value * @seen: measured value @@ -594,7 +594,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 0) /** - * EXPECT_GT(expected, seen) + * EXPECT_GT() * * @expected: expected value * @seen: measured value @@ -605,7 +605,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 0) /** - * EXPECT_GE(expected, seen) + * EXPECT_GE() * * @expected: expected value * @seen: measured value @@ -616,7 +616,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 0) /** - * EXPECT_NULL(seen) + * EXPECT_NULL() * * @seen: measured value * @@ -626,7 +626,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 0) /** - * EXPECT_TRUE(seen) + * EXPECT_TRUE() * * @seen: measured value * @@ -636,7 +636,7 @@ __EXPECT(0, "0", seen, #seen, !=, 0) /** - * EXPECT_FALSE(seen) + * EXPECT_FALSE() * * @seen: measured value * @@ -646,7 +646,7 @@ __EXPECT(0, "0", seen, #seen, ==, 0) /** - * EXPECT_STREQ(expected, seen) + * EXPECT_STREQ() * * @expected: expected value * @seen: measured value @@ -657,7 +657,7 @@ __EXPECT_STR(expected, seen, ==, 0) /** - * EXPECT_STRNE(expected, seen) + * EXPECT_STRNE() * * @expected: expected value * @seen: measured value -- cgit v1.3.1 From 1e6f5dcc1b9ec9068f5d38331cec38b35498edf5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Oct 2020 16:36:45 -0700 Subject: tools, bpftool: Avoid array index warnings. The bpf_caps array is shorter without CAP_BPF, avoid out of bounds reads if this isn't defined. Working around this avoids -Wno-array-bounds with clang. Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Reviewed-by: Tobias Klauser Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201027233646.3434896-1-irogers@google.com --- tools/bpf/bpftool/feature.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index a43a6f10b564..359960a8f1de 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -843,9 +843,14 @@ static int handle_perms(void) else p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'", capability_msg(bpf_caps, 0), +#ifdef CAP_BPF capability_msg(bpf_caps, 1), capability_msg(bpf_caps, 2), - capability_msg(bpf_caps, 3)); + capability_msg(bpf_caps, 3) +#else + "", "", "", "", "", "" +#endif /* CAP_BPF */ + ); goto exit_free; } -- cgit v1.3.1 From 0698ac66e01019528f0db4191ae3aaf9978e67da Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Oct 2020 16:36:46 -0700 Subject: tools, bpftool: Remove two unused variables. Avoid an unused variable warning. Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Reviewed-by: Tobias Klauser Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201027233646.3434896-2-irogers@google.com --- tools/bpf/bpftool/skeleton/profiler.bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 4e3512f700c0..ce5b65e07ab1 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -70,7 +70,7 @@ int BPF_PROG(fentry_XXX) static inline void fexit_update_maps(u32 id, struct bpf_perf_event_value *after) { - struct bpf_perf_event_value *before, diff, *accum; + struct bpf_perf_event_value *before, diff; before = bpf_map_lookup_elem(&fentry_readings, &id); /* only account samples with a valid fentry_reading */ @@ -95,7 +95,7 @@ int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; u32 cpu = bpf_get_smp_processor_id(); - u32 i, one = 1, zero = 0; + u32 i, zero = 0; int err; u64 *count; -- cgit v1.3.1 From af8afcf1fdd5f365f70e2386c2d8c7a1abd853d7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 29 Oct 2020 03:56:05 +0100 Subject: wireguard: selftests: check that route_me_harder packets use the right sk If netfilter changes the packet mark, the packet is rerouted. The ip_route_me_harder family of functions fails to use the right sk, opting to instead use skb->sk, resulting in a routing loop when used with tunnels. With the next change fixing this issue in netfilter, test for the relevant condition inside our test suite, since wireguard was where the bug was discovered. Reported-by: Chen Minqiang Signed-off-by: Jason A. Donenfeld Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/wireguard/netns.sh | 8 ++++++++ tools/testing/selftests/wireguard/qemu/kernel.config | 2 ++ 2 files changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index d77f4829f1e0..74c69b75f6f5 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -316,6 +316,14 @@ pp sleep 3 n2 ping -W 1 -c 1 192.168.241.1 n1 wg set wg0 peer "$pub2" persistent-keepalive 0 +# Test that sk_bound_dev_if works +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 +# What about when the mark changes and the packet must be rerouted? +n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1 +n1 ping -c 1 -W 1 192.168.241.2 # First the boring case +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case +n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1 + # Test that onion routing works, even when it loops n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 ip1 addr add 192.168.242.1/24 dev wg0 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index d531de13c95b..4eecb432a66c 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -18,10 +18,12 @@ CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MARK=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_NAT=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y -- cgit v1.3.1 From 7afc9d8f82906754e16fce560fb4e9733bb9b75e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:19 +0200 Subject: selftests: net: bridge: rename current igmp tests to igmpv2 To prepare the bridge_igmp.sh for IGMPv3 we need to rename the current test to IGMPv2. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/bridge_igmp.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 88d2472ba151..481198300b72 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="reportleave_test" +ALL_TESTS="v2reportleave_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -110,7 +110,7 @@ mcast_packet_test() return $seen } -reportleave_test() +v2reportleave_test() { RET=0 ip address add dev $h2 $TEST_GROUP/32 autojoin @@ -118,12 +118,12 @@ reportleave_test() sleep 5 bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null - check_err $? "Report didn't create mdb entry for $TEST_GROUP" + check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP" mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" - log_test "IGMP report $TEST_GROUP" + log_test "IGMPv2 report $TEST_GROUP" RET=0 bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null @@ -139,7 +139,7 @@ reportleave_test() mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry" - log_test "IGMP leave $TEST_GROUP" + log_test "IGMPv2 leave $TEST_GROUP" } trap cleanup EXIT -- cgit v1.3.1 From 79ae3e256aa1cfaa801e23a13b7f9e1a49cacb20 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:20 +0200 Subject: selftests: net: bridge: igmp: add support for packet source address Add support for one more argument which specifies the source address to use. It will be later used for IGMPv3 S,G entry testing. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/bridge_igmp.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 481198300b72..1c19459dbc58 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -83,9 +83,10 @@ cleanup() mcast_packet_test() { local mac=$1 - local ip=$2 - local host1_if=$3 - local host2_if=$4 + local src_ip=$2 + local ip=$3 + local host1_if=$4 + local host2_if=$5 local seen=0 # Add an ACL on `host2_if` which will tell us whether the packet @@ -94,7 +95,7 @@ mcast_packet_test() tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop - $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q + $MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q sleep 1 tc -j -s filter show dev $host2_if ingress \ @@ -120,7 +121,7 @@ v2reportleave_test() bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP" - mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" log_test "IGMPv2 report $TEST_GROUP" @@ -136,7 +137,7 @@ v2reportleave_test() bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP" - mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry" log_test "IGMPv2 leave $TEST_GROUP" -- cgit v1.3.1 From f0e260db4c9e0576b2092f30bddd6816f9d37383 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:21 +0200 Subject: selftests: net: bridge: igmp: check for specific udp ip protocol We have to specifically check for udp protocol in addition to the mac address because in IGMPv3 tests group-specific queries will use the same mac address. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/bridge_igmp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 1c19459dbc58..5562aef14c0a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -93,7 +93,7 @@ mcast_packet_test() # was received by it or not. tc qdisc add dev $host2_if ingress tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ - flower dst_mac $mac action drop + flower ip_proto udp dst_mac $mac action drop $MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q sleep 1 -- cgit v1.3.1 From 68d3163a4b7e50bc8e9bbf689d55174fdcd44fa5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:22 +0200 Subject: selftests: net: bridge: igmp: add IGMPv3 entries' state helpers Add helpers which will be used in subsequent tests, they are: - check_sg_entries: check for proper source list and S,G entry existence - check_sg_fwding: check for proper traffic forwarding/blocking - check_sg_state: check for proper blocked/forwarding entry state Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 67 ++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 5562aef14c0a..19c1f46d1151 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -143,6 +143,73 @@ v2reportleave_test() log_test "IGMPv2 leave $TEST_GROUP" } +check_sg_entries() +{ + local report=$1; shift + local slist=("$@") + local sarg="" + + for src in "${slist[@]}"; do + sarg="${sarg} and .source_list[].address == \"$src\"" + done + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null + check_err $? "Wrong *,G entry source list after $report report" + + for sgent in "${slist[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null + check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)" + done +} + +check_sg_fwding() +{ + local should_fwd=$1; shift + local sources=("$@") + + for src in "${sources[@]}"; do + local retval=0 + + mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1 + retval=$? + if [ $should_fwd -eq 1 ]; then + check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)" + else + check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)" + fi + done +} + +check_sg_state() +{ + local is_blocked=$1; shift + local sources=("$@") + local should_fail=1 + + if [ $is_blocked -eq 1 ]; then + should_fail=0 + fi + + for src in "${sources[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null) | + .source_list[] | + select(.address == \"$src\") | + select(.timer == \"0.00\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has zero timer" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \ + .flags[] == \"blocked\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has blocked flag" + done +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 98ae11cf8104a27fcd3dddaed4714be0600df419 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:23 +0200 Subject: selftests: net: bridge: add tests for igmpv3 is_include and inc -> allow reports First we test is_include/include mode then we build on that with allow effectively achieving: state report result action INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 82 +++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 19c1f46d1151..e9999e346ea6 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,11 +1,20 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test" +ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" TEST_GROUP_MAC="01:00:5e:0a:0a:0a" + +ALL_GROUP="224.0.0.1" +ALL_MAC="01:00:5e:00:00:01" + +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3 +MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03" +# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12 +MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" + source lib.sh h1_create() @@ -210,6 +219,77 @@ check_sg_state() done } +v3include_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.3") + + ip link set dev br0 type bridge mcast_igmp_version 3 + check_err $? "Could not change bridge IGMP version to 3" + + $MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null + check_err $? "Missing *,G entry with source list" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + check_sg_entries "is_include" "${X[@]}" +} + +v3cleanup() +{ + local port=$1 + local group=$2 + + bridge mdb del dev br0 port $port grp $group + ip link set dev br0 type bridge mcast_igmp_version 2 +} + +v3include_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.3") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + check_sg_state 0 "${X[@]}" + + check_sg_fwding 1 "${X[@]}" + check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_allow_test() +{ + RET=0 + local X=("192.0.2.10" "192.0.2.11" "192.0.2.12") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q + sleep 1 + check_sg_entries "allow" "${X[@]}" + + check_sg_state 0 "${X[@]}" + + check_sg_fwding 1 "${X[@]}" + check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> allow" + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 47021771064cc99fd106783ddc698b76684ec3f0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:24 +0200 Subject: selftests: net: bridge: add test for igmpv3 inc -> is_include report The test checks for the following case: state report result action INCLUDE (A) IS_IN (B) INCLUDE (A+B) (B)=GMI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index e9999e346ea6..added5c69e8b 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test" +ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -12,6 +12,8 @@ ALL_MAC="01:00:5e:00:00:01" # IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3 MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03" +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12 +MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" @@ -290,6 +292,27 @@ v3inc_allow_test() v3cleanup $swp1 $TEST_GROUP } +v3inc_is_include_test() +{ + RET=0 + local X=("192.0.2.10" "192.0.2.11" "192.0.2.12") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q + sleep 1 + check_sg_entries "is_include" "${X[@]}" + + check_sg_state 0 "${X[@]}" + + check_sg_fwding 1 "${X[@]}" + check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> is_include" + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 3c8b9fdad00481dfb0ca4ce81a5fec6c18fd77bc Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:25 +0200 Subject: selftests: net: bridge: add test for igmpv3 inc -> is_exclude report The test checks for the following case: state report result action INCLUDE (A) IS_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 Delete (A-B) Group Timer=GMI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 41 +++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index added5c69e8b..34d2c4370fa6 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test" +ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ + v3inc_is_exclude_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -16,6 +17,8 @@ MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00: MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21 +MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15" source lib.sh @@ -313,6 +316,42 @@ v3inc_is_include_test() v3cleanup $swp1 $TEST_GROUP } +v3inc_is_exclude_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2") + local Y=("192.0.2.20" "192.0.2.21") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + check_sg_state 0 "${X[@]}" + check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists" + + check_sg_fwding 1 "${X[@]}" 192.0.2.100 + check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP include -> is_exclude" + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 735af7bec0f128e67192512854db459f61b1c278 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:26 +0200 Subject: selftests: net: bridge: add test for igmpv3 inc -> to_exclude report The test checks for the following case: state report result action INCLUDE (A) TO_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 Delete (A-B) Send Q(G,A*B) Group Timer=GMI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 51 +++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 34d2c4370fa6..36f10a3168cc 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test" + v3inc_is_exclude_test v3inc_to_exclude_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -19,6 +19,8 @@ MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15" +# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30 +MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" source lib.sh @@ -352,6 +354,53 @@ v3inc_is_exclude_test() v3cleanup $swp1 $TEST_GROUP } +v3inc_to_exclude_test() +{ + RET=0 + local X=("192.0.2.1") + local Y=("192.0.2.20" "192.0.2.30") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + check_sg_state 0 "${X[@]}" + check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.21\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists" + + check_sg_fwding 1 "${X[@]}" 192.0.2.100 + check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP include -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From eecd8cfdff1b9e437ca9162d058de0cee68c1fe6 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:27 +0200 Subject: selftests: net: bridge: add test for igmpv3 exc -> allow report The test checks for the following case: state report result action EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 83 +++++++++++++++------- 1 file changed, 59 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 36f10a3168cc..d786e75abe2c 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test" + v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -17,6 +17,8 @@ MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00: MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30 +MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15" # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30 @@ -250,6 +252,38 @@ v3include_prepare() check_sg_entries "is_include" "${X[@]}" } +v3exclude_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local pkt=$4 + local X=("192.0.2.1" "192.0.2.2") + local Y=("192.0.2.20" "192.0.2.21") + + v3include_prepare $host1_if $mac $group + + $MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + check_sg_state 0 "${X[@]}" + check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists" +} + v3cleanup() { local port=$1 @@ -321,30 +355,8 @@ v3inc_is_include_test() v3inc_is_exclude_test() { RET=0 - local X=("192.0.2.1" "192.0.2.2") - local Y=("192.0.2.20" "192.0.2.21") - - v3include_prepare $h1 $ALL_MAC $ALL_GROUP - - $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC" -q - sleep 1 - bridge -j -d -s mdb show dev br0 \ - | jq -e ".[].mdb[] | \ - select(.grp == \"$TEST_GROUP\" and \ - .source_list != null and .filter_mode == \"exclude\")" &>/dev/null - check_err $? "Wrong *,G entry filter mode" - - check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" - check_sg_state 0 "${X[@]}" - check_sg_state 1 "${Y[@]}" - - bridge -j -d -s mdb show dev br0 \ - | jq -e ".[].mdb[] | \ - select(.grp == \"$TEST_GROUP\" and \ - .source_list != null and - .source_list[].address == \"192.0.2.3\")" &>/dev/null - check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists" + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP check_sg_fwding 1 "${X[@]}" 192.0.2.100 check_sg_fwding 0 "${Y[@]}" @@ -401,6 +413,29 @@ v3inc_to_exclude_test() v3cleanup $swp1 $TEST_GROUP } +v3exc_allow_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30") + local Y=("192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q + sleep 1 + check_sg_entries "allow" "${X[@]}" "${Y[@]}" + + check_sg_state 0 "${X[@]}" + check_sg_state 1 "${Y[@]}" + + check_sg_fwding 1 "${X[@]}" 192.0.2.100 + check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> allow" + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From e7e7ab7c00c2ecffa64defb54fe938d61fc19d39 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:28 +0200 Subject: selftests: net: bridge: add test for igmpv3 exc -> is_include report The test checks for the following case: state report result action EXCLUDE (X,Y) IS_IN (A) EXCLUDE (X+A,Y-A) (A)=GMI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 27 +++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index d786e75abe2c..b2b0f7d7e860 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test" + v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -15,6 +15,8 @@ ALL_MAC="01:00:5e:00:00:01" MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03" # IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12 MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30 +MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30 @@ -436,6 +438,29 @@ v3exc_allow_test() v3cleanup $swp1 $TEST_GROUP } +v3exc_is_include_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30") + local Y=("192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q + sleep 1 + check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + + check_sg_state 0 "${X[@]}" + check_sg_state 1 "${Y[@]}" + + check_sg_fwding 1 "${X[@]}" 192.0.2.100 + check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> is_include" + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 7b4f7138221a483e7642e582518814d579edf36a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:29 +0200 Subject: selftests: net: bridge: add test for igmpv3 exc -> is_exclude report The test checks for the following case: state report result action EXCLUDE (X,Y) IS_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=GMI Delete (X-A) Delete (Y-A) Group Timer=GMI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 28 +++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index b2b0f7d7e860..91b0b26428f6 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -2,7 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test" + v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ + v3exc_is_exclude_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -23,6 +24,8 @@ MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:0 MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15" +# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30 +MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30 MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" @@ -461,6 +464,29 @@ v3exc_is_include_test() v3cleanup $swp1 $TEST_GROUP } +v3exc_is_exclude_test() +{ + RET=0 + local X=("192.0.2.30") + local Y=("192.0.2.20") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q + sleep 1 + check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + check_sg_state 0 "${X[@]}" + check_sg_state 1 "${Y[@]}" + + check_sg_fwding 1 "${X[@]}" 192.0.2.100 + check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude" + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 65bfc146ab95ad980134fe206467d1d7108e402e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:30 +0200 Subject: selftests: net: bridge: add test for igmpv3 exc -> to_exclude report The test checks for the following case: state report result action EXCLUDE (X,Y) TO_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=Group Timer Delete (X-A) Delete (Y-A) Send Q(G,A-Y) Group Timer=GMI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 91b0b26428f6..3cfc30b88285 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -3,7 +3,7 @@ ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test" + v3exc_is_exclude_test v3exc_to_exclude_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -487,6 +487,34 @@ v3exc_is_exclude_test() v3cleanup $swp1 $TEST_GROUP } +v3exc_to_exclude_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.30") + local Y=("192.0.2.20") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q + sleep 1 + check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + check_sg_state 0 "${X[@]}" + check_sg_state 1 "${Y[@]}" + + check_sg_fwding 1 "${X[@]}" 192.0.2.100 + check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 80899f1b1c05a07f907bc54d6dc5bdadb37ab4f1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:31 +0200 Subject: selftests: net: bridge: add test for igmpv3 inc -> block report The test checks for the following case: state report result action INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 33 +++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 3cfc30b88285..3772c7a066c9 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -3,7 +3,7 @@ ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test" + v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -28,6 +28,8 @@ MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00: MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30 MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" +# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30 +MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" source lib.sh @@ -515,6 +517,35 @@ v3exc_to_exclude_test() v3cleanup $swp1 $TEST_GROUP } +v3inc_block_test() +{ + RET=0 + local X=("192.0.2.2" "192.0.2.3") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q + # make sure the lowered timers have expired (by default 2 seconds) + sleep 3 + check_sg_entries "block" "${X[@]}" + + check_sg_state 0 "${X[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" + + check_sg_fwding 1 "${X[@]}" + check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> block" + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 9eb58e07470bfb5a1c1d4ae08806b82d662171f7 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:32 +0200 Subject: selftests: net: bridge: add test for igmpv3 exc -> block report The test checks for the following case: state report result action EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer Send Q(G,A-Y) Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 3772c7a066c9..45c5619666d8 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -3,7 +3,7 @@ ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test" + v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -546,6 +546,34 @@ v3inc_block_test() v3cleanup $swp1 $TEST_GROUP } +v3exc_block_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.30") + local Y=("192.0.2.20" "192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q + sleep 1 + check_sg_entries "block" "${X[@]}" "${Y[@]}" + + check_sg_state 0 "${X[@]}" + check_sg_state 1 "${Y[@]}" + + check_sg_fwding 1 "${X[@]}" 192.0.2.100 + check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> block" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 18f66c96ea585ca7bdd5c75eae3077566b0d73c0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:33 +0200 Subject: selftests: net: bridge: add test for igmpv3 exclude timeout Test that when a group in exclude mode expires it changes mode to include and the blocked entries are deleted. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 49 +++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 45c5619666d8..db0a03e30868 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -3,7 +3,8 @@ ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test" + v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ + v3exc_timeout_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -574,6 +575,52 @@ v3exc_block_test() v3cleanup $swp1 $TEST_GROUP } +v3exc_timeout_test() +{ + RET=0 + local X=("192.0.2.20" "192.0.2.30") + + # GMI should be 3 seconds + ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500 + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q + sleep 3 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" + + check_sg_entries "allow" "${X[@]}" + + check_sg_state 0 "${X[@]}" + + check_sg_fwding 1 "${X[@]}" + check_sg_fwding 0 192.0.2.100 + + log_test "IGMPv3 group $TEST_GROUP exclude timeout" + + ip link set dev br0 type bridge mcast_query_interval 12500 \ + mcast_query_response_interval 1000 + + v3cleanup $swp1 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 414ea3754149847ec9491de9e9923750f5447331 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 27 Oct 2020 20:59:34 +0200 Subject: selftests: net: bridge: add test for igmpv3 *,g auto-add When we have *,G ports in exclude mode and a new S,G,port is added the kernel has to automatically create an S,G entry for each exclude port to get proper forwarding. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 31 +++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index db0a03e30868..0e71abdd7a03 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -4,7 +4,7 @@ ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ - v3exc_timeout_test" + v3exc_timeout_test v3star_ex_auto_add_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -621,6 +621,35 @@ v3exc_timeout_test() v3cleanup $swp1 $TEST_GROUP } +v3star_ex_auto_add_test() +{ + RET=0 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \ + .port == \"$swp1\")" &>/dev/null + check_err $? "S,G entry for *,G port doesn't exist" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \ + .port == \"$swp1\" and \ + .flags[] == \"added_by_star_ex\")" &>/dev/null + check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" + + check_sg_fwding 1 192.0.2.3 + + log_test "IGMPv3 S,G port entry automatic add to a *,G port" + + v3cleanup $swp1 $TEST_GROUP + v3cleanup $swp2 $TEST_GROUP +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 97a71a5455997d72adfd74e9d575d782d7c2ab9a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 26 Oct 2020 11:09:22 -0700 Subject: KVM: selftests: test behavior of unmapped L2 APIC-access address Add a regression test for commit 671ddc700fd0 ("KVM: nVMX: Don't leak L1 MMIO regions to L2"). First, check to see that an L2 guest can be launched with a valid APIC-access address that is backed by a page of L1 physical memory. Next, set the APIC-access address to a (valid) L1 physical address that is not backed by memory. KVM can't handle this situation, so resuming L2 should result in a KVM exit for internal error (emulation). Signed-off-by: Jim Mattson Reviewed-by: Ricardo Koller Reviewed-by: Peter Shier Message-Id: <20201026180922.3120555-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/include/x86_64/vmx.h | 6 + tools/testing/selftests/kvm/lib/x86_64/vmx.c | 9 ++ .../selftests/kvm/x86_64/vmx_apic_access_test.c | 142 +++++++++++++++++++++ 5 files changed, 159 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 307ceaadbbb9..d2c2d6205008 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -15,6 +15,7 @@ /x86_64/vmx_preemption_timer_test /x86_64/svm_vmcall_test /x86_64/sync_regs_test +/x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test /x86_64/vmx_set_nested_state_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 7ebe71fbca53..30afbad36cd5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -49,6 +49,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 54d624dd6c10..e78d7e26ba61 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -573,6 +573,10 @@ struct vmx_pages { void *eptp_hva; uint64_t eptp_gpa; void *eptp; + + void *apic_access_hva; + uint64_t apic_access_gpa; + void *apic_access; }; union vmx_basic { @@ -615,5 +619,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot, uint32_t eptp_memslot); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index f1e00d43eea2..2448b30e8efa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -542,3 +542,12 @@ void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); } + +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); + vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c new file mode 100644 index 000000000000..1f65342d6cb7 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_apic_access_test + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * The first subtest simply checks to see that an L2 guest can be + * launched with a valid APIC-access address that is backed by a + * page of L1 physical memory. + * + * The second subtest sets the APIC-access address to a (valid) L1 + * physical address that is not backed by memory. KVM can't handle + * this situation, so resuming L2 should result in a KVM exit for + * internal error (emulation). This is not an architectural + * requirement. It is just a shortcoming of KVM. The internal error + * is unfortunate, but it's better than what used to happen! + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +#define VCPU_ID 0 + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); + control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); + vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa); + + /* Try to launch L2 with the memory-backed APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + vmwrite(APIC_ACCESS_ADDR, high_gpa); + + /* Try to resume L2 with the unbacked APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + unsigned long apic_access_addr = ~0ul; + unsigned int paddr_width; + unsigned int vaddr_width; + vm_vaddr_t vmx_pages_gva; + unsigned long high_gpa; + struct vmx_pages *vmx; + bool done = false; + + nested_vmx_check_supported(); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + kvm_get_cpu_address_width(&paddr_width, &vaddr_width); + high_gpa = (1ul << paddr_width) - getpagesize(); + if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) { + print_skip("No unbacked physical page available"); + exit(KSFT_SKIP); + } + + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + prepare_virtualize_apic_accesses(vmx, vm, 0); + vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa); + + while (!done) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + if (apic_access_addr == high_gpa) { + TEST_ASSERT(run->exit_reason == + KVM_EXIT_INTERNAL_ERROR, + "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->internal.suberror == + KVM_INTERNAL_ERROR_EMULATION, + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", + run->internal.suberror); + break; + } + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + apic_access_addr = uc.args[1]; + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + } + } + kvm_vm_free(vm); + return 0; +} -- cgit v1.3.1 From 0f55b67e5ad8dcdfb9ae2aaed75dd9fbb2bd92b2 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 29 Oct 2020 21:17:00 +0100 Subject: KVM: selftests: Don't require THP to run tests Unless we want to test with THP, then we shouldn't require it to be configured by the host kernel. Unfortunately, even advising with MADV_NOHUGEPAGE does require it, so check for THP first in order to avoid madvise failing with EINVAL. Signed-off-by: Andrew Jones Message-Id: <20201029201703.102716-2-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 74776ee228f2..3327cebc1095 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define KVM_UTIL_PGS_PER_HUGEPG 512 @@ -664,13 +665,21 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { - ret = madvise(region->host_mem, npages * vm->page_size, - src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); - TEST_ASSERT(ret == 0, "madvise failed,\n" - " addr: %p\n" - " length: 0x%lx\n" - " src_type: %x", - region->host_mem, npages * vm->page_size, src_type); + struct stat statbuf; + + ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), + "stat /sys/kernel/mm/transparent_hugepage"); + + TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP, + "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel"); + + if (ret == 0) { + ret = madvise(region->host_mem, npages * vm->page_size, + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x", + region->host_mem, npages * vm->page_size, src_type); + } } region->unused_phy_pages = sparsebit_alloc(); -- cgit v1.3.1 From 7a60c2dd0f575ab14a457e99582af0ca1e072a74 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 28 Oct 2020 16:15:26 -0300 Subject: drm: Remove SCATTERLIST_MAX_SEGMENT Since commit 9a40401cfa13 ("lib/scatterlist: Do not limit max_segment to PAGE_ALIGNED values") the max_segment input to sg_alloc_table_from_pages() does not have to be any special value. The new algorithm will always create something less than what the user provides. Thus eliminate this confusing constant. - vmwgfx should use the HW capability, not mix in the OS page size for calling dma_set_max_seg_size() - i915 uses i915_sg_segment_size() both for sg_alloc_table_from_pages and for some open coded sgl construction. This doesn't change the value since rounddown(size, UINT_MAX) == SCATTERLIST_MAX_SEGMENT - drm_prime_pages_to_sg uses it as a default if max_segment is zero, UINT_MAX is fine to use directly. Cc: Gerd Hoffmann Cc: Daniel Vetter Cc: Thomas Hellstrom Cc: Qian Cai Cc: "Ursulin, Tvrtko" Suggested-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/0-v1-44733fccd781+13d-rm_scatterlist_max_jgg@nvidia.com --- drivers/gpu/drm/drm_prime.c | 4 ++-- drivers/gpu/drm/i915/i915_scatterlist.h | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 +-- include/linux/scatterlist.h | 6 ------ tools/testing/scatterlist/main.c | 2 +- 5 files changed, 5 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 187b55ede62e..a7b61c2d9190 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -820,8 +820,8 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, if (dev) max_segment = dma_max_mapping_size(dev->dev); - if (max_segment == 0 || max_segment > SCATTERLIST_MAX_SEGMENT) - max_segment = SCATTERLIST_MAX_SEGMENT; + if (max_segment == 0) + max_segment = UINT_MAX; sge = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0, nr_pages << PAGE_SHIFT, max_segment, diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index b7b59328cb76..883dd8d09d6b 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -112,7 +112,7 @@ static inline unsigned int i915_sg_segment_size(void) unsigned int size = swiotlb_max_segment(); if (size == 0) - return SCATTERLIST_MAX_SEGMENT; + size = UINT_MAX; size = rounddown(size, PAGE_SIZE); /* swiotlb_max_segment_size can return 1 byte when it means one page. */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b3a60959b5d5..0c42d2c05f43 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -794,8 +794,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) if (unlikely(ret != 0)) goto out_err0; - dma_set_max_seg_size(dev->dev, min_t(unsigned int, U32_MAX & PAGE_MASK, - SCATTERLIST_MAX_SEGMENT)); + dma_set_max_seg_size(dev->dev, U32_MAX); if (dev_priv->capabilities & SVGA_CAP_GMR2) { DRM_INFO("Max GMR ids is %u\n", diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 36c47e7e66a2..6f70572b2938 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -18,12 +18,6 @@ struct scatterlist { #endif }; -/* - * Since the above length field is an unsigned int, below we define the maximum - * length in bytes that can be stored in one scatterlist entry. - */ -#define SCATTERLIST_MAX_SEGMENT (UINT_MAX & PAGE_MASK) - /* * These macros should be used after a dma_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index b2c7e9f7b8d3..d264bf853034 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -50,7 +50,7 @@ static void fail(struct test *test, struct sg_table *st, const char *cond) int main(void) { - const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; + const unsigned int sgmax = UINT_MAX; struct test *test, tests[] = { { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, -- cgit v1.3.1 From afabdf3338728c3aaa9f55d127e903dcd5f4acc7 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sun, 1 Nov 2020 17:08:07 -0800 Subject: epoll: add a selftest for epoll timeout race Add a test case to ensure an event is observed by at least one poller when an epoll timeout is used. Signed-off-by: Guantao Liu Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Reviewed-by: Eric Dumazet Reviewed-by: Khazhismel Kumykov Acked-by: Willem de Bruijn Cc: Al Viro Cc: Davidlohr Bueso Link: https://lkml.kernel.org/r/20201028180202.952079-2-soheil.kdev@gmail.com Signed-off-by: Linus Torvalds --- .../filesystems/epoll/epoll_wakeup_test.c | 95 ++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index d979ff14775a..8f82f99f7748 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3282,4 +3282,99 @@ TEST(epoll60) close(ctx.epfd); } +struct epoll61_ctx { + int epfd; + int evfd; +}; + +static void *epoll61_write_eventfd(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + int64_t l = 1; + + usleep(10950); + write(ctx->evfd, &l, sizeof(l)); + return NULL; +} + +static void *epoll61_epoll_with_timeout(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + int n; + + n = epoll_wait(ctx->epfd, events, 1, 11); + /* + * If epoll returned the eventfd, write on the eventfd to wake up the + * blocking poller. + */ + if (n == 1) { + int64_t l = 1; + + write(ctx->evfd, &l, sizeof(l)); + } + return NULL; +} + +static void *epoll61_blocking_epoll(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + + epoll_wait(ctx->epfd, events, 1, -1); + return NULL; +} + +TEST(epoll61) +{ + struct epoll61_ctx ctx; + struct epoll_event ev; + int i, r; + + ctx.epfd = epoll_create1(0); + ASSERT_GE(ctx.epfd, 0); + ctx.evfd = eventfd(0, EFD_NONBLOCK); + ASSERT_GE(ctx.evfd, 0); + + ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP; + ev.data.ptr = NULL; + r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev); + ASSERT_EQ(r, 0); + + /* + * We are testing a race. Repeat the test case 1000 times to make it + * more likely to fail in case of a bug. + */ + for (i = 0; i < 1000; i++) { + pthread_t threads[3]; + int n; + + /* + * Start 3 threads: + * Thread 1 sleeps for 10.9ms and writes to the evenfd. + * Thread 2 calls epoll with a timeout of 11ms. + * Thread 3 calls epoll with a timeout of -1. + * + * The eventfd write by Thread 1 should either wakeup Thread 2 + * or Thread 3. If it wakes up Thread 2, Thread 2 writes on the + * eventfd to wake up Thread 3. + * + * If no events are missed, all three threads should eventually + * be joinable. + */ + ASSERT_EQ(pthread_create(&threads[0], NULL, + epoll61_write_eventfd, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[1], NULL, + epoll61_epoll_with_timeout, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[2], NULL, + epoll61_blocking_epoll, &ctx), 0); + + for (n = 0; n < ARRAY_SIZE(threads); ++n) + ASSERT_EQ(pthread_join(threads[n], NULL), 0); + } + + close(ctx.epfd); + close(ctx.evfd); +} + TEST_HARNESS_MAIN -- cgit v1.3.1 From 338b5da31de0d816b5718dad0e09482a27d51504 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 29 Oct 2020 21:09:31 +0200 Subject: selftests/net: timestamping: add ptp v2 support The timestamping tool is supporting now only PTPv1 (IEEE-1588 2002) while modern HW often supports also/only PTPv2. Hence timestamping tool is still useful for sanity testing of PTP drivers HW timestamping capabilities it's reasonable to upstate it to support PTPv2. This patch adds corresponding support which can be enabled by using new parameter "PTPV2". Signed-off-by: Grygorii Strashko Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20201029190931.30883-1-grygorii.strashko@ti.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/timestamping.c | 47 +++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index f4bb4fef0f39..21091be70688 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -59,7 +59,8 @@ static void usage(const char *error) " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" " SIOCGSTAMP - check last socket time stamp\n" - " SIOCGSTAMPNS - more accurate socket time stamp\n"); + " SIOCGSTAMPNS - more accurate socket time stamp\n" + " PTPV2 - use PTPv2 messages\n"); exit(1); } @@ -115,13 +116,28 @@ static const unsigned char sync[] = { 0x00, 0x00, 0x00, 0x00 }; -static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +static const unsigned char sync_v2[] = { + 0x00, 0x02, 0x00, 0x2C, + 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xFF, + 0xFE, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2) { + size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync); + const void *sync_p = ptpv2 ? sync_v2 : sync; struct timeval now; int res; - res = sendto(sock, sync, sizeof(sync), 0, - addr, addr_len); + res = sendto(sock, sync_p, sync_len, 0, addr, addr_len); gettimeofday(&now, 0); if (res < 0) printf("%s: %s\n", "send", strerror(errno)); @@ -134,9 +150,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) static void printpacket(struct msghdr *msg, int res, char *data, int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) + int siocgstamp, int siocgstampns, int ptpv2) { struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync); + const void *sync_p = ptpv2 ? sync_v2 : sync; struct cmsghdr *cmsg; struct timeval tv; struct timespec ts; @@ -210,10 +228,9 @@ static void printpacket(struct msghdr *msg, int res, "probably SO_EE_ORIGIN_TIMESTAMPING" #endif ); - if (res < sizeof(sync)) + if (res < sync_len) printf(" => truncated data?!"); - else if (!memcmp(sync, data + res - sizeof(sync), - sizeof(sync))) + else if (!memcmp(sync_p, data + res - sync_len, sync_len)) printf(" => GOT OUR DATA BACK (HURRAY!)"); break; } @@ -257,7 +274,7 @@ static void printpacket(struct msghdr *msg, int res, } static void recvpacket(int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) + int siocgstamp, int siocgstampns, int ptpv2) { char data[256]; struct msghdr msg; @@ -288,7 +305,7 @@ static void recvpacket(int sock, int recvmsg_flags, } else { printpacket(&msg, res, data, sock, recvmsg_flags, - siocgstamp, siocgstampns); + siocgstamp, siocgstampns, ptpv2); } } @@ -300,6 +317,7 @@ int main(int argc, char **argv) int siocgstamp = 0; int siocgstampns = 0; int ip_multicast_loop = 0; + int ptpv2 = 0; char *interface; int i; int enabled = 1; @@ -335,6 +353,8 @@ int main(int argc, char **argv) siocgstampns = 1; else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "PTPV2")) + ptpv2 = 1; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) @@ -369,6 +389,7 @@ int main(int argc, char **argv) HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; hwconfig.rx_filter = (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC : HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; hwconfig_requested = hwconfig; if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { @@ -496,16 +517,16 @@ int main(int argc, char **argv) printf("has error\n"); recvpacket(sock, 0, siocgstamp, - siocgstampns); + siocgstampns, ptpv2); recvpacket(sock, MSG_ERRQUEUE, siocgstamp, - siocgstampns); + siocgstampns, ptpv2); } } else { /* write one packet */ sendpacket(sock, (struct sockaddr *)&addr, - sizeof(addr)); + sizeof(addr), ptpv2); next.tv_sec += 5; continue; } -- cgit v1.3.1 From 7a078d2d18801bba7bde7337a823d7342299acf7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 29 Oct 2020 15:37:07 -0700 Subject: libbpf, hashmap: Fix undefined behavior in hash_bits If bits is 0, the case when the map is empty, then the >> is the size of the register which is undefined behavior - on x86 it is the same as a shift by 0. Fix by handling the 0 case explicitly and guarding calls to hash_bits for empty maps in hashmap__for_each_key_entry and hashmap__for_each_entry_safe. Fixes: e3b924224028 ("libbpf: add resizable non-thread safe internal hashmap") Suggested-by: Andrii Nakryiko , Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201029223707.494059-1-irogers@google.com --- tools/lib/bpf/hashmap.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index d9b385fe808c..10a4c4cd13cf 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -15,6 +15,9 @@ static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ + if (bits == 0) + return 0; + #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) /* LP64 case */ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); @@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur; \ cur = cur->next) \ if (map->equal_fn(cur->key, (_key), map->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ if (map->equal_fn(cur->key, (_key), map->ctx)) -- cgit v1.3.1 From bbbc7aa45eefd4ef7ffbd5ee3bb49bd8b68a2213 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Oct 2020 21:10:54 +0100 Subject: selftests: add test script for bareudp tunnels Test different encapsulation modes of the bareudp module: * Unicast MPLS, * IPv4 only, * IPv4 in multiproto mode (that is, IPv4 and IPv6), * IPv6. Each mode is tested with both an IPv4 and an IPv6 underlay. v2: * Add build dependencies in config file (Willem de Bruijn). * The MPLS test now uses its own IP addresses. This minimises the amount of cleanup between tests and simplifies the script. * Verify that iproute2 supports bareudp tunnels before running the script (and other minor usability improvements). Signed-off-by: Guillaume Nault Link: https://lore.kernel.org/r/8abc0e58f8a7eeb404f82466505a73110bc43ab8.1604088587.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/bareudp.sh | 538 +++++++++++++++++++++++++++++++++ tools/testing/selftests/net/config | 7 + 3 files changed, 546 insertions(+) create mode 100755 tools/testing/selftests/net/bareudp.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ef352477cac6..fa5fa425d148 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -21,6 +21,7 @@ TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh +TEST_PROGS += bareudp.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh new file mode 100755 index 000000000000..c6fe22de7d0e --- /dev/null +++ b/tools/testing/selftests/net/bareudp.sh @@ -0,0 +1,538 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Test various bareudp tunnel configurations. +# +# The bareudp module allows to tunnel network protocols like IP or MPLS over +# UDP, without adding any intermediate header. This scripts tests several +# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting +# IPv4, IPv6 or MPLS packets on the overlay). +# +# Network topology: +# +# * A chain of 4 network namespaces, connected with veth pairs. Each veth +# is assigned an IPv4 and an IPv6 address. A host-route allows a veth to +# join its peer. +# +# * NS0 and NS3 are at the extremities of the chain. They have additional +# IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0 +# and NS3, so that they can communicate using these overlay IP addresses. +# For IPv4 and IPv6 reachability tests, the route simply sets the peer's +# veth address as gateway. For MPLS reachability tests, an MPLS header is +# also pushed before the IP header. +# +# * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to +# encapsulate the traffic into UDP. +# +# +-----------------------------------------------------------------------+ +# | NS0 | +# | | +# | lo: | +# | * IPv4 address: 192.0.2.100/32 | +# | * IPv6 address: 2001:db8::100/128 | +# | * IPv6 address: 2001:db8::200/128 | +# | * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11 | +# | * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11 | +# | * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11 | +# | (encapsulated with MPLS label 203) | +# | | +# | veth01: | +# | ^ * IPv4 address: 192.0.2.10, peer 192.0.2.11/32 | +# | | * IPv6 address: 2001:db8::10, peer 2001:db8::11/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test) +# | +# +---+-------------------------------------------------------------------+ +# | | NS1 | +# | | | +# | v | +# | veth10: | +# | * IPv4 address: 192.0.2.11, peer 192.0.2.10/32 | +# | * IPv6 address: 2001:db8::11, peer 2001:db8::10/128 | +# | | +# | bareudp_ns1: | +# | * Encapsulate IP or MPLS packets received on veth10 into UDP | +# | and send the resulting packets through veth12. | +# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) | +# | received on veth12 and send the inner packets through veth10. | +# | | +# | veth12: | +# | ^ * IPv4 address: 192.0.2.21, peer 192.0.2.22/32 | +# | | * IPv6 address: 2001:db8::21, peer 2001:db8::22/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test), over UDP +# | +# +---+-------------------------------------------------------------------+ +# | | NS2 | +# | | | +# | v | +# | veth21: | +# | * IPv4 address: 192.0.2.22, peer 192.0.2.21/32 | +# | * IPv6 address: 2001:db8::22, peer 2001:db8::21/128 | +# | | +# | bareudp_ns2: | +# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) | +# | received on veth21 and send the inner packets through veth23. | +# | * Encapsulate IP or MPLS packets received on veth23 into UDP | +# | and send the resulting packets through veth21. | +# | | +# | veth23: | +# | ^ * IPv4 address: 192.0.2.32, peer 192.0.2.33/32 | +# | | * IPv6 address: 2001:db8::32, peer 2001:db8::33/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test) +# | +# +---+-------------------------------------------------------------------+ +# | | NS3 | +# | v | +# | veth32: | +# | * IPv4 address: 192.0.2.33, peer 192.0.2.32/32 | +# | * IPv6 address: 2001:db8::33, peer 2001:db8::32/128 | +# | | +# | lo: | +# | * IPv4 address: 192.0.2.103/32 | +# | * IPv6 address: 2001:db8::103/128 | +# | * IPv6 address: 2001:db8::203/128 | +# | * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32 | +# | * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32 | +# | * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32 | +# | (encapsulated with MPLS label 200) | +# | | +# +-----------------------------------------------------------------------+ + +ERR=4 # Return 4 by default, which is the SKIP code for kselftest +PING6="ping" +PAUSE_ON_FAIL="no" + +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) +readonly NS2=$(mktemp -u ns2-XXXXXXXX) +readonly NS3=$(mktemp -u ns3-XXXXXXXX) + +# Exit the script after having removed the network namespaces it created +# +# Parameters: +# +# * The list of network namespaces to delete before exiting. +# +exit_cleanup() +{ + for ns in "$@"; do + ip netns delete "${ns}" 2>/dev/null || true + done + + if [ "${ERR}" -eq 4 ]; then + echo "Error: Setting up the testing environment failed." >&2 + fi + + exit "${ERR}" +} + +# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3) +# +# New namespaces are cleaned up manually in case of error, to ensure that only +# namespaces created by this script are deleted. +create_namespaces() +{ + ip netns add "${NS0}" || exit_cleanup + ip netns add "${NS1}" || exit_cleanup "${NS0}" + ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" + ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" +} + +# The trap function handler +# +exit_cleanup_all() +{ + exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" +} + +# Configure a network interface using a host route +# +# Parameters +# +# * $1: the netns the network interface resides in, +# * $2: the network interface name, +# * $3: the local IPv4 address to assign to this interface, +# * $4: the IPv4 address of the remote network interface, +# * $5: the local IPv6 address to assign to this interface, +# * $6: the IPv6 address of the remote network interface. +# +iface_config() +{ + local NS="${1}"; readonly NS + local DEV="${2}"; readonly DEV + local LOCAL_IP4="${3}"; readonly LOCAL_IP4 + local PEER_IP4="${4}"; readonly PEER_IP4 + local LOCAL_IP6="${5}"; readonly LOCAL_IP6 + local PEER_IP6="${6}"; readonly PEER_IP6 + + ip -netns "${NS}" link set dev "${DEV}" up + ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}" + ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad +} + +# Create base networking topology: +# +# * set up the loopback device in all network namespaces (NS0..NS3), +# * set up a veth pair to connect each netns in sequence (NS0 with NS1, +# NS1 with NS2, etc.), +# * add and IPv4 and an IPv6 address on each veth interface, +# * prepare the ingress qdiscs in the intermediate namespaces. +# +setup_underlay() +{ + for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do + ip -netns "${ns}" link set dev lo up + done; + + ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" + ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" + ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" + iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128 + iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128 + iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128 + iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128 + iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128 + iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128 + + tc -netns "${NS1}" qdisc add dev veth10 ingress + tc -netns "${NS2}" qdisc add dev veth23 ingress +} + +# Set up the IPv4, IPv6 and MPLS overlays. +# +# Configuration is similar for all protocols: +# +# * add an overlay IP address on the loopback interface of each edge +# namespace, +# * route these IP addresses via the intermediate namespaces (for the MPLS +# tests, this is also where MPLS encapsulation is done), +# * add routes for these IP addresses (or MPLS labels) in the intermediate +# namespaces. +# +# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be +# done just before running the reachability tests. + +setup_overlay_ipv4() +{ + # Add the overlay IP addresses and route them through the veth devices + ip -netns "${NS0}" address add 192.0.2.100/32 dev lo + ip -netns "${NS3}" address add 192.0.2.103/32 dev lo + ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11 + ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32 + + # Route the overlay addresses in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 + ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 + ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 +} + +setup_overlay_ipv6() +{ + # Add the overlay IP addresses and route them through the veth devices + ip -netns "${NS0}" address add 2001:db8::100/128 dev lo + ip -netns "${NS3}" address add 2001:db8::103/128 dev lo + ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11 + ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32 + + # Route the overlay addresses in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1 + ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10 + ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33 +} + +setup_overlay_mpls() +{ + # Add specific overlay IP addresses, routed over MPLS + ip -netns "${NS0}" address add 2001:db8::200/128 dev lo + ip -netns "${NS3}" address add 2001:db8::203/128 dev lo + ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11 + ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32 + + # Route the MPLS packets in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256 + ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256 + ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10 + ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33 +} + +# Run "ping" from NS0 and print the result +# +# Parameters: +# +# * $1: the variant of ping to use (normally either "ping" or "ping6"), +# * $2: the IP address to ping, +# * $3: a human readable description of the purpose of the test. +# +# If the test fails and PAUSE_ON_FAIL is active, the user is given the +# possibility to continue with the next test or to quit immediately. +# +ping_test_one() +{ + local PING="$1"; readonly PING + local IP="$2"; readonly IP + local MSG="$3"; readonly MSG + local RET + + printf "TEST: %-60s " "${MSG}" + + set +e + ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1 + RET=$? + set -e + + if [ "${RET}" -eq 0 ]; then + printf "[ OK ]\n" + else + ERR=1 + printf "[FAIL]\n" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + printf "\nHit enter to continue, 'q' to quit\n" + read a + if [ "$a" = "q" ]; then + exit 1 + fi + fi + fi +} + +# Run reachability tests +# +# Parameters: +# +# * $1: human readable string describing the underlay protocol. +# +# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling +# function. +# +ping_test() +{ + local UNDERLAY="$1"; readonly UNDERLAY + local MODE + local MSG + + if [ "${MULTIPROTO}" = "multiproto" ]; then + MODE=" (multiproto mode)" + else + MODE="" + fi + + if [ $IPV4 ]; then + ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}" + fi + if [ $IPV6 ]; then + ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}" + fi + if [ $MPLS_UC ]; then + ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}" + fi +} + +# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6 +# +# Parameters: +# +# * $1: the packet type (protocol) to be handled by bareudp, +# * $2: a flag to activate or deactivate bareudp's "multiproto" mode. +# +test_overlay() +{ + local ETHERTYPE="$1"; readonly ETHERTYPE + local MULTIPROTO="$2"; readonly MULTIPROTO + local IPV4 + local IPV6 + local MPLS_UC + + case "${ETHERTYPE}" in + "ipv4") + IPV4="ipv4" + if [ "${MULTIPROTO}" = "multiproto" ]; then + IPV6="ipv6" + else + IPV6="" + fi + MPLS_UC="" + ;; + "ipv6") + IPV6="ipv6" + IPV4="" + MPLS_UC="" + ;; + "mpls_uc") + MPLS_UC="mpls_uc" + IPV4="" + IPV6="" + ;; + *) + exit 1 + ;; + esac + readonly IPV4 + readonly IPV6 + readonly MPLS_UC + + # Create the bareudp devices in the intermediate namespaces + ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}" + ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}" + + # IPv4 over UDPv4 + if [ $IPV4 ]; then + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \ + flower dst_ip 192.0.2.103/32 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \ + flower dst_ip 192.0.2.100/32 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # IPv6 over UDPv4 + if [ $IPV6 ]; then + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \ + flower dst_ip 2001:db8::103/128 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \ + flower dst_ip 2001:db8::100/128 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # MPLS (unicast) over UDPv4 + if [ $MPLS_UC ]; then + ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1 + ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1 + + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \ + flower mpls_label 203 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \ + flower mpls_label 200 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # Test IPv4 underlay + ping_test "UDPv4" + + # Cleanup bareudp encapsulation instructions, as they were specific to + # the IPv4 underlay, before setting up and testing the IPv6 underlay + tc -netns "${NS1}" filter delete dev veth10 ingress + tc -netns "${NS2}" filter delete dev veth23 ingress + + # IPv4 over UDPv6 + if [ $IPV4 ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \ + flower dst_ip 192.0.2.103/32 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \ + flower dst_ip 192.0.2.100/32 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # IPv6 over UDPv6 + if [ $IPV6 ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \ + flower dst_ip 2001:db8::103/128 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \ + flower dst_ip 2001:db8::100/128 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # MPLS (unicast) over UDPv6 + if [ $MPLS_UC ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \ + flower mpls_label 203 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \ + flower mpls_label 200 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # Test IPv6 underlay + ping_test "UDPv6" + + tc -netns "${NS1}" filter delete dev veth10 ingress + tc -netns "${NS2}" filter delete dev veth23 ingress + ip -netns "${NS1}" link delete bareudp_ns1 + ip -netns "${NS2}" link delete bareudp_ns2 +} + +check_features() +{ + ip link help 2>&1 | grep -q bareudp + if [ $? -ne 0 ]; then + echo "Missing bareudp support in iproute2" >&2 + exit_cleanup + fi + + # Use ping6 on systems where ping doesn't handle IPv6 + ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6" +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +check_features + +# Create namespaces before setting up the exit trap. +# Otherwise, exit_cleanup_all() could delete namespaces that were not created +# by this script. +create_namespaces + +set -e +trap exit_cleanup_all EXIT + +setup_underlay +setup_overlay_ipv4 +setup_overlay_ipv6 +setup_overlay_mpls + +test_overlay ipv4 nomultiproto +test_overlay ipv6 nomultiproto +test_overlay ipv4 multiproto +test_overlay mpls_uc nomultiproto + +if [ "${ERR}" -eq 1 ]; then + echo "Some tests failed." >&2 +else + ERR=0 +fi diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 4d5df8e1eee7..614d5477365a 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -34,3 +34,10 @@ CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m CONFIG_NET_FOU=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_MIRRED=m +CONFIG_BAREUDP=m -- cgit v1.3.1 From e1eb075ccf3766860b7aa3f104ca29dcb8a46ed0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Sep 2020 14:33:38 -0700 Subject: rcutorture: Make preemptible TRACE02 enable lockdep Currently, the CONFIG_PREEMPT_NONE=y rcutorture TRACE01 rcutorture scenario enables lockdep. This limits its ability to find bugs due to non-preemptible sections of code being RCU readers, and pretty much all code thus appearing to lockdep to be an RCU reader. This commit therefore moves lockdep testing to the CONFIG_PREEMPT=y rcutorture TRACE02 scenario. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/TRACE01 | 6 +++--- tools/testing/selftests/rcutorture/configs/rcu/TRACE02 | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 index 12e7661b86f5..34c8ff5a12f2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 @@ -4,8 +4,8 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_LOCKING=y -#CHECK#CONFIG_PROVE_RCU=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +#CHECK#CONFIG_PROVE_RCU=n CONFIG_TASKS_TRACE_RCU_READ_MB=y CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 index b69ed6673c41..77541eeb4e9f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 @@ -4,8 +4,8 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_LOCKING=n -#CHECK#CONFIG_PROVE_RCU=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +#CHECK#CONFIG_PROVE_RCU=y CONFIG_TASKS_TRACE_RCU_READ_MB=n CONFIG_RCU_EXPERT=y -- cgit v1.3.1 From 08c7974293851da6a64989b5ce7a0750e58178b1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Aug 2020 06:46:03 -0700 Subject: torture: Don't kill gdb sessions The rcutorture scripting will do a "kill -9" on any guest OS that exceeds its --duration by more than a few minutes, which is very valuable when bugs result in hangs. However, this is a problem when the "hang" was due to a --gdb debugging session. This commit therefore refrains from killing the guest OS when a debugging session is in progress. This means that the user must manually kill the kvm.sh process group if a hang really does occur. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 6dc2b49b85ea..d04966ab88cc 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -206,7 +206,10 @@ do kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 then - if test $kruntime -ge $seconds -o -f "$TORTURE_STOPFILE" + if test -n "$TORTURE_KCONFIG_GDB_ARG" + then + : + elif test $kruntime -ge $seconds || test -f "$TORTURE_STOPFILE" then break; fi -- cgit v1.3.1 From 716e343f014e2b25320f332677363e884684b742 Mon Sep 17 00:00:00 2001 From: Michael Weiß Date: Tue, 27 Oct 2020 21:42:58 +0100 Subject: selftests/timens: added selftest for /proc/stat btime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test that btime value of /proc/stat is as expected in the time namespace using a simple parser to get btime from /proc/stat. Signed-off-by: Michael Weiß Reviewed-by: Andrei Vagin Acked-by: Thomas Gleixner Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20201027204258.7869-4-michael.weiss@aisec.fraunhofer.de --- tools/testing/selftests/timens/procfs.c | 58 ++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c index 7f14f0fdac84..f2519154208a 100644 --- a/tools/testing/selftests/timens/procfs.c +++ b/tools/testing/selftests/timens/procfs.c @@ -93,6 +93,33 @@ static int read_proc_uptime(struct timespec *uptime) return 0; } +static int read_proc_stat_btime(unsigned long long *boottime_sec) +{ + FILE *proc; + char line_buf[2048]; + + proc = fopen("/proc/stat", "r"); + if (proc == NULL) { + pr_perror("Unable to open /proc/stat"); + return -1; + } + + while (fgets(line_buf, 2048, proc)) { + if (sscanf(line_buf, "btime %llu", boottime_sec) != 1) + continue; + fclose(proc); + return 0; + } + if (errno) { + pr_perror("fscanf"); + fclose(proc); + return -errno; + } + pr_err("failed to parse /proc/stat"); + fclose(proc); + return -1; +} + static int check_uptime(void) { struct timespec uptime_new, uptime_old; @@ -123,18 +150,47 @@ static int check_uptime(void) return 0; } +static int check_stat_btime(void) +{ + unsigned long long btime_new, btime_old; + unsigned long long btime_expected; + + if (switch_ns(parent_ns)) + return pr_err("switch_ns(%d)", parent_ns); + + if (read_proc_stat_btime(&btime_old)) + return 1; + + if (switch_ns(child_ns)) + return pr_err("switch_ns(%d)", child_ns); + + if (read_proc_stat_btime(&btime_new)) + return 1; + + btime_expected = btime_old - TEN_DAYS_IN_SEC; + if (btime_new != btime_expected) { + pr_fail("btime in /proc/stat: old %llu, new %llu [%llu]", + btime_old, btime_new, btime_expected); + return 1; + } + + ksft_test_result_pass("Passed for /proc/stat btime\n"); + return 0; +} + int main(int argc, char *argv[]) { int ret = 0; nscheck(); - ksft_set_plan(1); + ksft_set_plan(2); if (init_namespaces()) return 1; ret |= check_uptime(); + ret |= check_stat_btime(); if (ret) ksft_exit_fail(); -- cgit v1.3.1 From b773ea650576f14442f7a546f2b15e64b10ed0eb Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Wed, 28 Oct 2020 13:59:00 -0300 Subject: perf tools: Remove LTO compiler options when building perl support To avoid breaking the build by mixing files compiled with things coming from distro specific compiler options for perl with the rest of perf, i.e. to avoid this: `.gnu.debuglto_.debug_macro' referenced in section `.gnu.debuglto_.debug_macro' of /tmp/build/perf/util/scripting-engines/perf-in.o: defined in discarded section `.gnu.debuglto_.debug_macro[wm4.stdcpredef.h.19.8dc41bed5d9037ff9622e015fb5f0ce3]' of /tmp/build/perf/util/scripting-engines/perf-in.o Noticed on Fedora 33. Signed-off-by: Justin M. Forbes Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1593431 Cc: Jiri Olsa Link: https://src.fedoraproject.org/rpms/kernel-tools/c/589a32b62f0c12516ab7b34e3dd30d450145bfa4?branch=master Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6890fc4b063a..ce8516e4de34 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -749,6 +749,7 @@ else PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) -- cgit v1.3.1 From e555b4b8d7b2844a9e48e06a7c3e4f9e44af847f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:26:45 -0300 Subject: perf tools: Update copy of libbpf's hashmap.c To pick the changes in: 85367030a6c7ef33 ("libbpf: Centralize poisoning and poison reallocarray()") 7d9c71e10baa3496 ("libbpf: Extract generic string hashing function for reuse") That don't entail any changes in tools/perf. This addresses this perf build warning: Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h' diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h Not a kernel ABI, its just that this uses the mechanism in place for checking kernel ABI files drift. Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hashmap.c | 3 +++ tools/perf/util/hashmap.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c index a405dad068f5..3c20b126d60d 100644 --- a/tools/perf/util/hashmap.c +++ b/tools/perf/util/hashmap.c @@ -15,6 +15,9 @@ /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray + /* start with 4 buckets */ #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index e0af36b0e5d8..d9b385fe808c 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits) #endif } +/* generic C-string hashing function */ +static inline size_t str_hash(const char *s) +{ + size_t h = 0; + + while (*s) { + h = h * 31 + *s; + s++; + } + return h; +} + typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); -- cgit v1.3.1 From 263e452eff397b370e39d464c8cbd30f6bd59fb9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:29:30 -0300 Subject: tools headers UAPI: Update process_madvise affected files To pick the changes from: ecb8ac8b1f146915 ("mm/madvise: introduce process_madvise() syscall: an external memory hinting API") That addresses these perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Linus Torvalds Cc: Minchan Kim Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 4 +++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 11 +++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index f2b5d72a46c2..2056318988f7 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2) __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) #define __NR_faccessat2 439 __SYSCALL(__NR_faccessat2, sys_faccessat2) +#define __NR_process_madvise 440 +__SYSCALL(__NR_process_madvise, sys_process_madvise) #undef __NR_syscalls -#define __NR_syscalls 440 +#define __NR_syscalls 441 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 347809649ba2..379819244b91 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -361,12 +361,13 @@ 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise # -# x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. The __x32_compat_sys stubs are created -# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 -# is defined. +# Due to a historical design error, certain syscalls are numbered differently +# in x32 as compared to native x86_64. These syscalls have numbers 512-547. +# Do not add new syscalls to this range. Numbers 548 and above are available +# for non-x32 use. # 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn @@ -404,3 +405,5 @@ 545 x32 execveat compat_sys_execveat 546 x32 preadv2 compat_sys_preadv64v2 547 x32 pwritev2 compat_sys_pwritev64v2 +# This is the end of the legacy x32 range. Numbers 548 and above are +# not special and are not to be used for x32-specific syscalls. -- cgit v1.3.1 From ab8bf5f2e0321f254590ad81c6e230185d88b4e5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 16 Oct 2020 14:47:18 +0300 Subject: perf tools: Fix crash with non-jited bpf progs The addr in PERF_RECORD_KSYMBOL events for non-jited bpf progs points to the bpf interpreter, ie. within kernel text section. When processing the unregister event, this causes unexpected removal of vmlinux_map, crashing perf later in cleanup: # perf record -- timeout --signal=INT 2s /usr/share/bcc/tools/execsnoop PCOMM PID PPID RET ARGS [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.208 MB perf.data (5155 samples) ] perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed. Aborted (core dumped) # perf script -D|grep KSYM 0 0xa40 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6 0 0xab0 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_8c42dee26e8cd4c2 0 0xb20 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6 108563691893 0x33d98 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3b0 len 0 type 1 flags 0x0 name bpf_prog_bc5697a410556fc2_syscall__execve 108568518458 0x34098 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3f0 len 0 type 1 flags 0x0 name bpf_prog_45e2203c2928704d_do_ret_sys_execve 109301967895 0x34830 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3b0 len 0 type 1 flags 0x1 name bpf_prog_bc5697a410556fc2_syscall__execve 109302007356 0x348b0 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3f0 len 0 type 1 flags 0x1 name bpf_prog_45e2203c2928704d_do_ret_sys_execve perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed. Here the addresses match the bpf interpreter: # grep -e ffffffffa9b6b530 -e ffffffffa9b6b3b0 -e ffffffffa9b6b3f0 /proc/kallsyms ffffffffa9b6b3b0 t __bpf_prog_run224 ffffffffa9b6b3f0 t __bpf_prog_run192 ffffffffa9b6b530 t __bpf_prog_run32 Fix by not allowing vmlinux_map to be removed by PERF_RECORD_KSYMBOL unregister event. Signed-off-by: Tommi Rantala Acked-by: Jiri Olsa Tested-by: Jiri Olsa Link: https://lore.kernel.org/r/20201016114718.54332-1-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 11 ++++++++++- tools/perf/util/symbol.c | 7 +++++++ tools/perf/util/symbol.h | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7d4194ffc5b0..15385ea00190 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -786,11 +786,20 @@ static int machine__process_ksymbol_unregister(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { + struct symbol *sym; struct map *map; map = maps__find(&machine->kmaps, event->ksymbol.addr); - if (map) + if (!map) + return 0; + + if (map != machine->vmlinux_map) maps__remove(&machine->kmaps, map); + else { + sym = dso__find_symbol(map->dso, map->map_ip(map, map->start)); + if (sym) + dso__delete_symbol(map->dso, sym); + } return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 6138866665df..0d14abdf3d72 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -515,6 +515,13 @@ void dso__insert_symbol(struct dso *dso, struct symbol *sym) } } +void dso__delete_symbol(struct dso *dso, struct symbol *sym) +{ + rb_erase_cached(&sym->rb_node, &dso->symbols); + symbol__delete(sym); + dso__reset_find_symbol_cache(dso); +} + struct symbol *dso__find_symbol(struct dso *dso, u64 addr) { if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f4801c488def..954d6a049ee2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -131,6 +131,8 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map); void dso__insert_symbol(struct dso *dso, struct symbol *sym); +void dso__delete_symbol(struct dso *dso, + struct symbol *sym); struct symbol *dso__find_symbol(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); -- cgit v1.3.1 From a6293f36ac92ab513771a98efe486477be2f981f Mon Sep 17 00:00:00 2001 From: Stanislav Ivanichkin Date: Tue, 27 Oct 2020 12:43:57 +0300 Subject: perf trace: Fix segfault when trying to trace events by cgroup # ./perf trace -e sched:sched_switch -G test -a sleep 1 perf: Segmentation fault Obtained 11 stack frames. ./perf(sighandler_dump_stack+0x43) [0x55cfdc636db3] /lib/x86_64-linux-gnu/libc.so.6(+0x3efcf) [0x7fd23eecafcf] ./perf(parse_cgroups+0x36) [0x55cfdc673f36] ./perf(+0x3186ed) [0x55cfdc70d6ed] ./perf(parse_options_subcommand+0x629) [0x55cfdc70e999] ./perf(cmd_trace+0x9c2) [0x55cfdc5ad6d2] ./perf(+0x1e8ae0) [0x55cfdc5ddae0] ./perf(+0x1e8ded) [0x55cfdc5ddded] ./perf(main+0x370) [0x55cfdc556f00] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe6) [0x7fd23eeadb96] ./perf(_start+0x29) [0x55cfdc557389] Segmentation fault # It happens because "struct trace" in option->value is passed to the parse_cgroups function instead of "struct evlist". Fixes: 9ea42ba4411ac ("perf trace: Support setting cgroups as targets") Signed-off-by: Stanislav Ivanichkin Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Dmitry Monakhov Link: http://lore.kernel.org/lkml/20201027094357.94881-1-sivanichkin@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 44a75f234db1..de80534473af 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4639,9 +4639,9 @@ do_concat: err = 0; if (lists[0]) { - struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option); + struct option o = { + .value = &trace->evlist, + }; err = parse_events_option(&o, lists[0], 0); } out: @@ -4655,9 +4655,12 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u { struct trace *trace = opt->value; - if (!list_empty(&trace->evlist->core.entries)) - return parse_cgroups(opt, str, unset); - + if (!list_empty(&trace->evlist->core.entries)) { + struct option o = { + .value = &trace->evlist, + }; + return parse_cgroups(&o, str, unset); + } trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); return 0; -- cgit v1.3.1 From 0dfbe4c646bf06a85c3d70572a8b8aa6ebffe3d5 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 23 Oct 2020 08:53:34 +0800 Subject: perf vendor events: Fix DRAM_BW_Use 0 issue for CLX/SKX Ian reports an issue that the metric DRAM_BW_Use often remains 0. The metric expression for DRAM_BW_Use on CLX/SKX: "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time" The counts of uncore_imc/cas_count_read/ and uncore_imc/cas_count_write/ are scaled up by 64, that is to turn a count of cache lines into bytes, the count is then divided by 1000000000 to give GB. However, the counts of uncore_imc/cas_count_read/ and uncore_imc/cas_count_write/ have been scaled yet. The scale values are from sysfs, such as /sys/devices/uncore_imc_0/events/cas_count_read.scale. It's 6.103515625e-5 (64 / 1024.0 / 1024.0). So if we use original metric expression, the result is not correct. But the difficulty is, for SKL client, the counts are not scaled. The metric expression for DRAM_BW_Use on SKL: "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000" root@kbl-ppc:~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 190 arb/event=0x84,umask=0x1/ # 1.86 DRAM_BW_Use 29,093,178 arb/event=0x81,umask=0x1/ 1,000,703,287 ns duration_time 1.000703287 seconds time elapsed The result is expected. So the easy way is just change the metric expression for CLX/SKX. This patch changes the metric expression to: "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time" 1048576 = 1024 * 1024. Before (tested on CLX): root@lkp-csl-2sp5 ~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 765.35 MiB uncore_imc/cas_count_read/ # 0.00 DRAM_BW_Use 5.42 MiB uncore_imc/cas_count_write/ 1001515088 ns duration_time 1.001515088 seconds time elapsed After: root@lkp-csl-2sp5 ~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 767.95 MiB uncore_imc/cas_count_read/ # 0.80 DRAM_BW_Use 5.02 MiB uncore_imc/cas_count_write/ 1001900010 ns duration_time 1.001900010 seconds time elapsed Fixes: 038d3b53c284 ("perf vendor events intel: Update CascadelakeX events to v1.08") Fixes: b5ff7f2799a4 ("perf vendor events: Update SkylakeX events to v1.21") Signed-off-by: Jin Yao Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201023005334.7869-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index de3193552277..00f4fcffa815 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -329,7 +329,7 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time", "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index f31794d3b926..0dd8b13b5cfb 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -323,7 +323,7 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time", "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, -- cgit v1.3.1 From 9ae1e990f1ab522b98baefbfebf3cbac1a2cfac2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 28 Oct 2020 09:11:23 +0100 Subject: perf tools: Remove broken __no_tail_call attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GCC specific __attribute__((optimize)) attribute does not what is commonly expected and is explicitly recommended against using in production code by the GCC people. Unlike what is often expected, it doesn't add to the optimization flags, but it fully replaces them, loosing any and all optimization flags provided by the compiler commandline. The only guaranteed upon means of inhibiting tail-calls is by placing a volatile asm with side-effects after the call such that the tail-call simply cannot be done. Given the original commit wasn't specific on which calls were the problem, this removal might re-introduce the problem, which can then be re-analyzed and cured properly. Signed-off-by: Peter Zijlstra Acked-by: Ard Biesheuvel Acked-by: Miguel Ojeda Cc: Alexei Starovoitov Cc: Arnd Bergmann Cc: Arvind Sankar Cc: Daniel Borkmann Cc: Geert Uytterhoeven Cc: Ian Rogers Cc: Josh Poimboeuf Cc: Kees Kook Cc: Martin Liška Cc: Nick Desaulniers Cc: Randy Dunlap Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/20201028081123.GT2628@hirez.programming.kicks-ass.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 12 ------------ tools/include/linux/compiler.h | 3 --- tools/perf/tests/dwarf-unwind.c | 10 +++++----- 3 files changed, 5 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index b9d4322e1e65..95c072b70d0e 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -27,18 +27,6 @@ #define __pure __attribute__((pure)) #endif #define noinline __attribute__((noinline)) -#ifdef __has_attribute -#if __has_attribute(disable_tail_calls) -#define __no_tail_call __attribute__((disable_tail_calls)) -#endif -#endif -#ifndef __no_tail_call -#if GCC_VERSION > 40201 -#define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls"))) -#else -#define __no_tail_call -#endif -#endif #ifndef __packed #define __packed __attribute__((packed)) #endif diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 2b3f7353e891..d22a974372c0 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -47,9 +47,6 @@ #ifndef noinline #define noinline #endif -#ifndef __no_tail_call -#define __no_tail_call -#endif /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2491d167bf76..83638097c3bc 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -95,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) +noinline int test_dwarf_unwind__thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -126,7 +126,7 @@ __no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) +noinline int test_dwarf_unwind__compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -145,7 +145,7 @@ __no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) return p1 - p2; } -__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) +noinline int test_dwarf_unwind__krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -164,12 +164,12 @@ __no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) return global_unwind_retval; } -__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread) +noinline int test_dwarf_unwind__krava_2(struct thread *thread) { return test_dwarf_unwind__krava_3(thread); } -__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread) +noinline int test_dwarf_unwind__krava_1(struct thread *thread) { return test_dwarf_unwind__krava_2(thread); } -- cgit v1.3.1 From d0e7b0c71fbb653de90a7163ef46912a96f0bdaf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Oct 2020 08:24:38 -0300 Subject: perf scripting python: Avoid declaring function pointers with a visibility attribute To avoid this: util/scripting-engines/trace-event-python.c: In function 'python_start_script': util/scripting-engines/trace-event-python.c:1595:2: error: 'visibility' attribute ignored [-Werror=attributes] 1595 | PyMODINIT_FUNC (*initfunc)(void); | ^~~~~~~~~~~~~~ That started breaking when building with PYTHON=python3 and these gcc versions (I haven't checked with the clang ones, maybe it breaks there as well): # export PERF_TARBALL=http://192.168.86.5/perf/perf-5.9.0.tar.xz # dm fedora:33 fedora:rawhide 1 107.80 fedora:33 : Ok gcc (GCC) 10.2.1 20201005 (Red Hat 10.2.1-5), clang version 11.0.0 (Fedora 11.0.0-1.fc33) 2 92.47 fedora:rawhide : Ok gcc (GCC) 10.2.1 20201016 (Red Hat 10.2.1-6), clang version 11.0.0 (Fedora 11.0.0-1.fc34) # Avoid that by ditching that 'initfunc' function pointer with its: #define Py_EXPORTED_SYMBOL _attribute_ ((visibility ("default"))) #define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* And just call PyImport_AppendInittab() at the end of the ifdef python3 block with the functions that were being attributed to that initfunc. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7cbd024e3e63..c83c2c6564e0 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1592,7 +1592,6 @@ static void _free_command_line(wchar_t **command_line, int num) static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; - PyMODINIT_FUNC (*initfunc)(void); #if PY_MAJOR_VERSION < 3 const char **command_line; #else @@ -1607,20 +1606,18 @@ static int python_start_script(const char *script, int argc, const char **argv) FILE *fp; #if PY_MAJOR_VERSION < 3 - initfunc = initperf_trace_context; command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; + PyImport_AppendInittab(name, initperf_trace_context); #else - initfunc = PyInit_perf_trace_context; command_line = malloc((argc + 1) * sizeof(wchar_t *)); command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); + PyImport_AppendInittab(name, PyInit_perf_trace_context); #endif - - PyImport_AppendInittab(name, initfunc); Py_Initialize(); #if PY_MAJOR_VERSION < 3 -- cgit v1.3.1 From ad6330ac2c5a38e5573cb6ae8ff75288bfd96325 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:38:16 -0300 Subject: tools headers UAPI: Sync prctl.h with the kernel sources To get the changes in: 1c101da8b971a366 ("arm64: mte: Allow user control of the tag check mode via prctl()") af5ce95282dc99d0 ("arm64: mte: Allow user control of the generated random tags via prctl()") Which don't cause any change in tooling, only addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' diff -u tools/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Cc: Adrian Hunter Cc: Catalin Marinas Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 07b4f8131e36..7f0827705c9a 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -233,6 +233,15 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.3.1 From 9e228f48980635c187720c0956b39c04db5e8f56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:41:58 -0300 Subject: tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: 13149e8bafc46572 ("drm/i915: add syncobj timeline support") cda9edd02425d790 ("drm/i915: introduce a mechanism to extend execbuf2") That don't result in any changes in tooling, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Lionel Landwerlin Cc: Namhyung Kim Cc: Rodrigo Vivi Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 59 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 00546062e023..fa1f3d62f9a6 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1046,6 +1052,38 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; +/** + * See drm_i915_gem_execbuffer_ext_timeline_fences. + */ +#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 + +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 fence_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of length + * fence_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of length fence_count. Values + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -1062,8 +1100,14 @@ struct drm_i915_gem_execbuffer2 { __u32 num_cliprects; /** * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY - * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a - * struct drm_i915_gem_exec_fence *fences. + * & I915_EXEC_USE_EXTENSIONS are not set. + * + * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array + * of struct drm_i915_gem_exec_fence and num_cliprects is the length + * of the array. + * + * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a + * single struct i915_user_extension and num_cliprects is 0. */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (0x3f) @@ -1181,7 +1225,16 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_SUBMIT (1 << 20) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list of supported structures are listed in the + * drm_i915_gem_execbuffer_ext enum. + */ +#define I915_EXEC_USE_EXTENSIONS (1 << 21) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.3.1 From d0448d6a249b6fc4518181b214d3403dfe2c8075 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:12:52 -0300 Subject: tools headers UAPI: Update fscrypt.h copy To get the changes from: c7f0207b613033c5 ("fscrypt: make "#define fscrypt_policy" user-only") That don't cause any changes in tools/perf, only addresses this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h' diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7875709ccfeb..e5de60336938 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -45,7 +45,6 @@ struct fscrypt_policy_v1 { __u8 flags; __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; }; -#define fscrypt_policy fscrypt_policy_v1 /* * Process-subscribed "logon" key description prefix and payload format. @@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg { __u32 __out_reserved[13]; }; -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ #define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) @@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg { /* old names; don't add anything new here! */ #ifndef __KERNEL__ +#define fscrypt_policy fscrypt_policy_v1 #define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE #define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 #define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 -- cgit v1.3.1 From 40a6bbf5149c7302bd7515fb5e2c3d12bac462f5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:21:04 -0300 Subject: tools x86 headers: Update cpufeatures.h headers copies To pick the changes from: 5866e9205b47a983 ("x86/cpu: Add hardware-enforced cache coherency as a CPUID feature") ff4f82816dff28ff ("x86/cpufeatures: Enumerate ENQCMD and ENQCMDS instructions") 360e7c5c4ca4fd8e ("x86/cpufeatures: Add SEV-ES CPU feature") 18ec63faefb3fd31 ("x86/cpufeatures: Enumerate TSX suspend load address tracking instructions") e48cb1a3fb916500 ("x86/resctrl: Enumerate per-thread MBA controls") Which don't cause any changes in tooling, just addresses these build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fenghua Yu Cc: Ian Rogers Cc: Jiri Olsa Cc: Krish Sadhukhan Cc: Kyung Min Park Cc: Namhyung Kim Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 6 +++++- tools/arch/x86/include/asm/disabled-features.h | 9 ++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 2901d5df4366..dad350d42ecf 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -/* free ( 3*32+17) */ +#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -236,6 +236,7 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -288,6 +289,7 @@ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -353,6 +355,7 @@ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ @@ -368,6 +371,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 4ea8584682f9..5861d34f9771 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,12 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_IOMMU_SUPPORT +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -75,7 +81,8 @@ #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ + DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) -- cgit v1.3.1 From 8b2fc25a945b125c7ee4c36b048ad65f7c04105e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:24:52 -0300 Subject: tools x86 headers: Update required-features.h header from the kernel To pick the changes from: ecac71816a1829c0 ("x86/paravirt: Use CONFIG_PARAVIRT_XXL instead of CONFIG_PARAVIRT") That don entail any changes in tooling, just addressing these perf tools build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/required-features.h' differs from latest version at 'arch/x86/include/asm/required-features.h' diff -u tools/arch/x86/include/asm/required-features.h arch/x86/include/asm/required-features.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Juergen Gross Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/required-features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 6847d85400a8..3ff0d48469f2 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -54,7 +54,7 @@ #endif #ifdef CONFIG_X86_64 -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL /* Paravirtualized systems may not have PSE or PGE available */ #define NEED_PSE 0 #define NEED_PGE 0 -- cgit v1.3.1 From 32b734e09ec38a0bb81d05d37056a95584d14c99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:28:04 -0300 Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: 29dcc60f6a19fb0a ("x86/boot/compressed/64: Add stage1 #VC handler") 36e1be8ada994d50 ("perf/x86/amd/ibs: Fix raw sample data accumulation") 59a854e2f3b90ad2 ("perf/x86/intel: Support TopDown metrics on Ice Lake") 7b2c05a15d29d057 ("perf/x86/intel: Generic support for hardware TopDown metrics") 99e40204e014e066 ("x86/msr: Move the F15h MSRs where they belong") b57de6cd16395be1 ("x86/sev-es: Add SEV-ES Feature Detection") ed7bde7a6dab521e ("cpufreq: intel_pstate: Allow enable/disable energy efficiency") f0f2f9feb4ee6f28 ("x86/msr-index: Define an IA32_PASID MSR") That cause these changes in tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2020-10-19 13:27:33.195274425 -0300 +++ after 2020-10-19 13:27:44.144507610 -0300 @@ -113,6 +113,8 @@ [0x00000309] = "CORE_PERF_FIXED_CTR0", [0x0000030a] = "CORE_PERF_FIXED_CTR1", [0x0000030b] = "CORE_PERF_FIXED_CTR2", + [0x0000030c] = "CORE_PERF_FIXED_CTR3", + [0x00000329] = "PERF_METRICS", [0x00000345] = "IA32_PERF_CAPABILITIES", [0x0000038d] = "CORE_PERF_FIXED_CTR_CTRL", [0x0000038e] = "CORE_PERF_GLOBAL_STATUS", @@ -222,6 +224,7 @@ [0x00000774] = "HWP_REQUEST", [0x00000777] = "HWP_STATUS", [0x00000d90] = "IA32_BNDCFGS", + [0x00000d93] = "IA32_PASID", [0x00000da0] = "IA32_XSS", [0x00000dc0] = "LBR_INFO_0", [0x00000ffc] = "IA32_BNDCFGS_RSVD", @@ -279,6 +282,7 @@ [0xc0010115 - x86_AMD_V_KVM_MSRs_offset] = "VM_IGNNE", [0xc0010117 - x86_AMD_V_KVM_MSRs_offset] = "VM_HSAVE_PA", [0xc001011f - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VIRT_SPEC_CTRL", + [0xc0010130 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV_ES_GHCB", [0xc0010131 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV", [0xc0010140 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_ID_LENGTH", [0xc0010141 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_STATUS", $ Which causes these parts of tools/perf/ to be rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o DESCEND plugins GEN /tmp/build/perf/python/perf.so INSTALL trace_plugins LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/per At some point these should just be tables read by perf on demand. This addresses this perf tools build warning: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fenghua Yu Cc: Ian Rogers Cc: Jiri Olsa Cc: Joerg Roedel Cc: Kan Liang Cc: Kim Phillips Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Srinivas Pandruvada Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 2859ee4f39a8..972a34d93505 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -257,6 +257,9 @@ #define MSR_IA32_LASTINTFROMIP 0x000001dd #define MSR_IA32_LASTINTTOIP 0x000001de +#define MSR_IA32_PASID 0x00000d93 +#define MSR_IA32_PASID_VALID BIT_ULL(31) + /* DEBUGCTLMSR bits (others vary by model): */ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 @@ -464,11 +467,15 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL< Date: Mon, 19 Oct 2020 13:36:41 -0300 Subject: tools UAPI: Update copy of linux/mman.h from the kernel sources e47168f3d1b14af5 ("powerpc/8xx: Support 16k hugepages with 4k pages") That don't cause any changes in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h' diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h Cc: Adrian Hunter Cc: Christophe Leroy Cc: Ian Rogers Cc: Jiri Olsa Cc: Michael Ellerman Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mman.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index 923cc162609c..f55bc680b5b0 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -27,6 +27,7 @@ #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB -- cgit v1.3.1 From aa04899a13078e4181146212555a1bbaa387d2c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:42:55 -0300 Subject: tools kvm headers: Update KVM headers from the kernel sources Some should cause changes in tooling, like the one adding LAST_EXCP, but the way it is structured end up not making that happen. The new SVM_EXIT_INVPCID should get used by arch/x86/util/kvm-stat.c, in the svm_exit_reasons table. The tools/perf/trace/beauty part has scripts to catch changes and automagically create tables, like tools/perf/trace/beauty/kvm_ioctl.sh, but changes are needed to make tools/perf/arch/x86/util/kvm-stat.c catch those automatically. These were handled by the existing scripts: $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2020-11-03 08:43:52.910728608 -0300 +++ after 2020-11-03 08:44:04.273959984 -0300 @@ -89,6 +89,7 @@ [0xbf] = "SET_NESTED_STATE", [0xc0] = "CLEAR_DIRTY_LOG", [0xc1] = "GET_SUPPORTED_HV_CPUID", + [0xc6] = "X86_SET_MSR_FILTER", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2020-11-03 08:45:55.522225198 -0300 +++ after 2020-11-03 08:46:12.881578666 -0300 @@ -37,4 +37,5 @@ [0x71] = "VDPA_GET_STATUS", [0x73] = "VDPA_GET_CONFIG", [0x76] = "VDPA_GET_VRING_NUM", + [0x78] = "VDPA_GET_IOVA_RANGE", }; $ This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/sie.h' differs from latest version at 'arch/s390/include/uapi/asm/sie.h' diff -u tools/arch/s390/include/uapi/asm/sie.h arch/s390/include/uapi/asm/sie.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h' diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h Cc: Adrian Hunter Cc: Alexander Yarygin Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Cornelia Huck Cc: David Ahern Cc: Ian Rogers Cc: Jiri Olsa Cc: Joerg Roedel Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 25 +++++++++++++++++++++++++ tools/arch/s390/include/uapi/asm/sie.h | 2 +- tools/arch/x86/include/uapi/asm/kvm.h | 20 ++++++++++++++++++++ tools/arch/x86/include/uapi/asm/svm.h | 13 +++++++++++++ tools/include/uapi/linux/kvm.h | 19 +++++++++++++++++++ tools/include/uapi/linux/vhost.h | 4 ++++ 6 files changed, 82 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index ba85bb23f060..1c17c3a24411 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -159,6 +159,21 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. + */ +struct kvm_pmu_event_filter { + __u16 base_event; + __u16 nevents; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + + __u8 action; + __u8 pad[3]; +}; + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { @@ -242,6 +257,15 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 + +/* + * Only two states can be presented by the host kernel: + * - NOT_REQUIRED: the guest doesn't need to do anything + * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available) + * + * All the other values are deprecated. The host still accepts all + * values (they are ABI), but will narrow them to the above two. + */ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 @@ -329,6 +353,7 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h index 6ca1e68d7103..ede318653c87 100644 --- a/tools/arch/s390/include/uapi/asm/sie.h +++ b/tools/arch/s390/include/uapi/asm/sie.h @@ -29,7 +29,7 @@ { 0x13, "SIGP conditional emergency signal" }, \ { 0x15, "SIGP sense running" }, \ { 0x16, "SIGP set multithreading"}, \ - { 0x17, "SIGP store additional status ait address"} + { 0x17, "SIGP store additional status at address"} #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 0780f97c1850..89e5f3d1bba8 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -192,6 +192,26 @@ struct kvm_msr_list { __u32 indices[0]; }; +/* Maximum size of any access bitmap in bytes */ +#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600 + +/* for KVM_X86_SET_MSR_FILTER */ +struct kvm_msr_filter_range { +#define KVM_MSR_FILTER_READ (1 << 0) +#define KVM_MSR_FILTER_WRITE (1 << 1) + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* MSR index the bitmap starts at */ + __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */ +}; + +#define KVM_MSR_FILTER_MAX_RANGES 16 +struct kvm_msr_filter { +#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) + __u32 flags; + struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; +}; struct kvm_cpuid_entry { __u32 function; diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 2e8a30f06c74..f1d8307454e0 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -29,6 +29,7 @@ #define SVM_EXIT_WRITE_DR6 0x036 #define SVM_EXIT_WRITE_DR7 0x037 #define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_LAST_EXCP 0x05f #define SVM_EXIT_INTR 0x060 #define SVM_EXIT_NMI 0x061 #define SVM_EXIT_SMI 0x062 @@ -76,10 +77,21 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +/* SEV-ES software-defined VMGEXIT events */ +#define SVM_VMGEXIT_MMIO_READ 0x80000001 +#define SVM_VMGEXIT_MMIO_WRITE 0x80000002 +#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003 +#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004 +#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 +#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 +#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff + #define SVM_EXIT_ERR -1 #define SVM_EXIT_REASONS \ @@ -171,6 +183,7 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7d8eced6f459..ca41220b40b8 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -248,6 +248,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 #define KVM_EXIT_ARM_NISV 28 +#define KVM_EXIT_X86_RDMSR 29 +#define KVM_EXIT_X86_WRMSR 30 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -413,6 +415,17 @@ struct kvm_run { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; + /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ + struct { + __u8 error; /* user -> kernel */ + __u8 pad[7]; +#define KVM_MSR_EXIT_REASON_INVAL (1 << 0) +#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) +#define KVM_MSR_EXIT_REASON_FILTER (1 << 2) + __u32 reason; /* kernel -> user */ + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; /* Fix the size of the union. */ char padding[256]; }; @@ -1037,6 +1050,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 #define KVM_CAP_STEAL_TIME 187 +#define KVM_CAP_X86_USER_SPACE_MSR 188 +#define KVM_CAP_X86_MSR_FILTER 189 +#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #ifdef KVM_CAP_IRQ_ROUTING @@ -1538,6 +1554,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_MSR_FILTER */ +#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 75232185324a..c998860d7bbc 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -146,4 +146,8 @@ /* Set event fd for config interrupt*/ #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) #endif -- cgit v1.3.1 From a9e27f5f9827eab25b76155fddcc22ddeeed58d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:49:59 -0300 Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h The diff is just tabs versus spaces, trivial. This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3e5dcdd48a49..b95d3c485d27 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1196,7 +1196,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ /* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 38 +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ -- cgit v1.3.1 From 42cc0e70a21faa8e7d7ea8713a3f9cd64bd3f60a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:52:11 -0300 Subject: tools include UAPI: Update linux/mount.h copy To pick the changes from: dab741e0e02bd3c4 ("Add a "nosymfollow" mount option.") That ends up adding support for the new MS_NOSYMFOLLOW mount flag: $ tools/perf/trace/beauty/mount_flags.sh > before $ cp include/uapi/linux/mount.h tools/include/uapi/linux/mount.h $ tools/perf/trace/beauty/mount_flags.sh > after $ diff -u before after --- before 2020-11-03 08:51:28.117997454 -0300 +++ after 2020-11-03 08:51:38.992218869 -0300 @@ -7,6 +7,7 @@ [32 ? (ilog2(32) + 1) : 0] = "REMOUNT", [64 ? (ilog2(64) + 1) : 0] = "MANDLOCK", [128 ? (ilog2(128) + 1) : 0] = "DIRSYNC", + [256 ? (ilog2(256) + 1) : 0] = "NOSYMFOLLOW", [1024 ? (ilog2(1024) + 1) : 0] = "NOATIME", [2048 ? (ilog2(2048) + 1) : 0] = "NODIRATIME", [4096 ? (ilog2(4096) + 1) : 0] = "BIND", $ So now one can use it in --filter expressions for tracepoints. This silences this perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/linux/mount.h' differs from latest version at 'include/uapi/linux/mount.h' diff -u tools/include/uapi/linux/mount.h include/uapi/linux/mount.h Cc: Adrian Hunter Cc: Al Viro Cc: Ian Rogers Cc: Jiri Olsa Cc: Mattias Nissler Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 96a0240f23fe..dd8306ea336c 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -16,6 +16,7 @@ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 -- cgit v1.3.1 From 86449b12f626a65d2a2ecfada1e024488471f9e2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 30 Oct 2020 16:54:31 -0700 Subject: perf hists browser: Increase size of 'buf' in perf_evsel__hists_browse() Making perf with gcc-9.1.1 generates the following warning: CC ui/browsers/hists.o ui/browsers/hists.c: In function 'perf_evsel__hists_browse': ui/browsers/hists.c:3078:61: error: '%d' directive output may be \ truncated writing between 1 and 11 bytes into a region of size \ between 2 and 12 [-Werror=format-truncation=] 3078 | "Max event group index to sort is %d (index from 0 to %d)", | ^~ ui/browsers/hists.c:3078:7: note: directive argument in the range [-2147483648, 8] 3078 | "Max event group index to sort is %d (index from 0 to %d)", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/stdio.h:937, from ui/browsers/hists.c:5: IOW, the string in line 3078 might be too long for buf[] of 64 bytes. Fix this by increasing the size of buf[] to 128. Fixes: dbddf1747441 ("perf report/top TUI: Support hotkeys to let user select any event for sorting") Signed-off-by: Song Liu Acked-by: Jiri Olsa Cc: Jin Yao Cc: stable@vger.kernel.org # v5.7+ Link: http://lore.kernel.org/lkml/20201030235431.534417-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a07626f07208..b0e1880cf992 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2963,7 +2963,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, struct popup_action actions[MAX_OPTIONS]; int nr_options = 0; int key = -1; - char buf[64]; + char buf[128]; int delay_secs = hbt ? hbt->refresh : 0; #define HIST_BROWSER_HELP_COMMON \ -- cgit v1.3.1 From 6311951d4f8f28c43b554ff0719027884bedd7e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Nov 2020 00:31:02 +0100 Subject: perf tools: Initialize output buffer in build_id__sprintf We display garbage for undefined build_id objects, because we don't initialize the output buffer. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201101233103.3537427-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 8763772f1095..6b410c3d52dc 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -102,6 +102,8 @@ int build_id__sprintf(const struct build_id *build_id, char *bf) const u8 *raw = build_id->data; size_t i; + bf[0] = 0x0; + for (i = 0; i < build_id->size; ++i) { sprintf(bid, "%02x", *raw); ++raw; -- cgit v1.3.1 From fe01adb72356a4e2f8735e4128af85921ca98fa1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Nov 2020 00:31:03 +0100 Subject: perf tools: Add missing swap for ino_generation We are missing swap for ino_generation field. Fixes: 5c5e854bc760 ("perf tools: Add attr->mmap2 support") Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201101233103.3537427-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7a5f03764702..d20b16ee7377 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -595,6 +595,7 @@ static void perf_event__mmap2_swap(union perf_event *event, event->mmap2.maj = bswap_32(event->mmap2.maj); event->mmap2.min = bswap_32(event->mmap2.min); event->mmap2.ino = bswap_64(event->mmap2.ino); + event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); if (sample_id_all) { void *data = &event->mmap2.filename; -- cgit v1.3.1 From 2c589d933e54d183ee2a052971b730e423c62031 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Nov 2020 23:02:28 +0900 Subject: perf tools: Add missing swap for cgroup events It was missed to add a swap function for PERF_RECORD_CGROUP. Fixes: ba78c1c5461c ("perf tools: Basic support for CGROUP event") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201102140228.303657-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d20b16ee7377..098080287c68 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -711,6 +711,18 @@ static void perf_event__namespaces_swap(union perf_event *event, swap_sample_id_all(event, &event->namespaces.link_info[i]); } +static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all) +{ + event->cgroup.id = bswap_64(event->cgroup.id); + + if (sample_id_all) { + void *data = &event->cgroup.path; + + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } +} + static u8 revbyte(u8 b) { int rev = (b >> 4) | ((b & 0xf) << 4); @@ -953,6 +965,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, + [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, -- cgit v1.3.1 From 5d020cbd86204e51da05628623a6f9729d4b04c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 09:24:20 -0300 Subject: tools feature: Fixup fast path feature detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 22dd1ac91a776752 ("tools: Remove feature-libelf-mmap feature detection") correctly simplified the this feature detection, but forgot to remove the call to the removed function in the main() function for the test-all.c fast path feature detection, making it fail and thus do all the feature detection individually, fix it. $ cat /tmp/build/perf/feature/test-all.make.output test-all.c: In function ‘main’: test-all.c:188:2: error: implicit declaration of function ‘main_test_libelf_mmap’; did you mean ‘main_test_libelf’? [-Werror=implicit-function-declaration] 188 | main_test_libelf_mmap(); | ^~~~~~~~~~~~~~~~~~~~~ | main_test_libelf cc1: all warnings being treated as errors $ vim tools/build/feature/test-all.c $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ;make V=1 -k O=/tmp/build/perf -C tools/perf install-bin ; perf test python $ cat /tmp/build/perf/feature/test-all.make.output $ Fixes: 22dd1ac91a776752 ("tools: Remove feature-libelf-mmap feature detection") Cc: Alexei Starovoitov Cc: Andrii Nakryiko Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-all.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index a04e81321c66..464873883396 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -185,7 +185,6 @@ int main(int argc, char *argv[]) main_test_libperl(); main_test_hello(); main_test_libelf(); - main_test_libelf_mmap(); main_test_get_current_dir_name(); main_test_gettid(); main_test_glibc(); -- cgit v1.3.1 From aaf376bddf68d0afe5f4b5f25fc555da358e2287 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 3 Nov 2020 13:34:48 -0800 Subject: selftests/bpf: Move test_tcppbf_user into test_progs Recently a bug was missed due to the fact that test_tcpbpf_user is not a part of test_progs. In order to prevent similar issues in the future move the test functionality into test_progs. By doing this we can make certain that it is a part of standard testing and will not be overlooked. As a part of moving the functionality into test_progs it is necessary to integrate with the test_progs framework and to drop any redundant code. This patch: 1. Cleans up the include headers 2. Dropped a duplicate definition of bpf_find_map 3. Switched over to using test_progs specific cgroup functions 4. Renamed main to test_tcpbpf_user 5. Dropped return value in favor of CHECK_FAIL to check for errors The general idea is that I wanted to keep the changes as small as possible while moving the file into the test_progs framework. The follow-on patches are meant to clean up the remaining issues such as the use of CHECK_FAIL. Signed-off-by: Alexander Duyck Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160443928881.1086697.17661359319919165370.stgit@localhost.localdomain --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 3 +- .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 141 ++++++++++++++++++ tools/testing/selftests/bpf/test_tcpbpf_user.c | 165 --------------------- 4 files changed, 142 insertions(+), 168 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c delete mode 100644 tools/testing/selftests/bpf/test_tcpbpf_user.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 3ab1200e172f..395ae040ce1f 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -8,7 +8,6 @@ FEATURE-DUMP.libbpf fixdep test_dev_cgroup /test_progs* -test_tcpbpf_user test_verifier_log feature test_sock diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 542768f5195b..50e5b18fc455 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_verifier_log test_dev_cgroup test_tcpbpf_user \ + test_verifier_log test_dev_cgroup \ test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sysctl \ @@ -163,7 +163,6 @@ $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c -$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c new file mode 100644 index 000000000000..caa8d3adec8a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include "test_tcpbpf.h" + +#define CG_NAME "/tcpbpf-user-test" + +/* 3 comes from one listening socket + both ends of the connection */ +#define EXPECTED_CLOSE_EVENTS 3 + +#define EXPECT_EQ(expected, actual, fmt) \ + do { \ + if ((expected) != (actual)) { \ + printf(" Value of: " #actual "\n" \ + " Actual: %" fmt "\n" \ + " Expected: %" fmt "\n", \ + (actual), (expected)); \ + ret--; \ + } \ + } while (0) + +int verify_result(const struct tcpbpf_globals *result) +{ + __u32 expected_events; + int ret = 0; + + expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | + (1 << BPF_SOCK_OPS_RWND_INIT) | + (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | + (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_NEEDS_ECN) | + (1 << BPF_SOCK_OPS_STATE_CB) | + (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); + + EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); + EXPECT_EQ(501ULL, result->bytes_received, "llu"); + EXPECT_EQ(1002ULL, result->bytes_acked, "llu"); + EXPECT_EQ(1, result->data_segs_in, PRIu32); + EXPECT_EQ(1, result->data_segs_out, PRIu32); + EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); + EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); + EXPECT_EQ(1, result->num_listen, PRIu32); + EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32); + + return ret; +} + +int verify_sockopt_result(int sock_map_fd) +{ + __u32 key = 0; + int ret = 0; + int res; + int rv; + + /* check setsockopt for SAVE_SYN */ + rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); + EXPECT_EQ(0, rv, "d"); + EXPECT_EQ(0, res, "d"); + key = 1; + /* check getsockopt for SAVED_SYN */ + rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); + EXPECT_EQ(0, rv, "d"); + EXPECT_EQ(1, res, "d"); + return ret; +} + +void test_tcpbpf_user(void) +{ + const char *file = "test_tcpbpf_kern.o"; + int prog_fd, map_fd, sock_map_fd; + struct tcpbpf_globals g = {0}; + int error = EXIT_FAILURE; + struct bpf_object *obj; + int cg_fd = -1; + int retry = 10; + __u32 key = 0; + int rv; + + cg_fd = test__join_cgroup(CG_NAME); + if (cg_fd < 0) + goto err; + + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + if (system("./tcp_server.py")) { + printf("FAILED: TCP server\n"); + goto err; + } + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results"); + if (sock_map_fd < 0) + goto err; + +retry_lookup: + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) { + printf("Unexpected number of close events (%d), retrying!\n", + g.num_close_events); + usleep(100); + goto retry_lookup; + } + + if (verify_result(&g)) { + printf("FAILED: Wrong stats\n"); + goto err; + } + + if (verify_sockopt_result(sock_map_fd)) { + printf("FAILED: Wrong sockopt stats\n"); + goto err; + } + + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + if (cg_fd != -1) + close(cg_fd); + + CHECK_FAIL(error); +} diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c deleted file mode 100644 index 74a9e49988b6..000000000000 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bpf_rlimit.h" -#include "bpf_util.h" -#include "cgroup_helpers.h" - -#include "test_tcpbpf.h" - -/* 3 comes from one listening socket + both ends of the connection */ -#define EXPECTED_CLOSE_EVENTS 3 - -#define EXPECT_EQ(expected, actual, fmt) \ - do { \ - if ((expected) != (actual)) { \ - printf(" Value of: " #actual "\n" \ - " Actual: %" fmt "\n" \ - " Expected: %" fmt "\n", \ - (actual), (expected)); \ - ret--; \ - } \ - } while (0) - -int verify_result(const struct tcpbpf_globals *result) -{ - __u32 expected_events; - int ret = 0; - - expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | - (1 << BPF_SOCK_OPS_RWND_INIT) | - (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | - (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | - (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | - (1 << BPF_SOCK_OPS_NEEDS_ECN) | - (1 << BPF_SOCK_OPS_STATE_CB) | - (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); - - EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); - EXPECT_EQ(501ULL, result->bytes_received, "llu"); - EXPECT_EQ(1002ULL, result->bytes_acked, "llu"); - EXPECT_EQ(1, result->data_segs_in, PRIu32); - EXPECT_EQ(1, result->data_segs_out, PRIu32); - EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); - EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); - EXPECT_EQ(1, result->num_listen, PRIu32); - EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32); - - return ret; -} - -int verify_sockopt_result(int sock_map_fd) -{ - __u32 key = 0; - int ret = 0; - int res; - int rv; - - /* check setsockopt for SAVE_SYN */ - rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); - EXPECT_EQ(0, rv, "d"); - EXPECT_EQ(0, res, "d"); - key = 1; - /* check getsockopt for SAVED_SYN */ - rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); - EXPECT_EQ(0, rv, "d"); - EXPECT_EQ(1, res, "d"); - return ret; -} - -static int bpf_find_map(const char *test, struct bpf_object *obj, - const char *name) -{ - struct bpf_map *map; - - map = bpf_object__find_map_by_name(obj, name); - if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); - return -1; - } - return bpf_map__fd(map); -} - -int main(int argc, char **argv) -{ - const char *file = "test_tcpbpf_kern.o"; - int prog_fd, map_fd, sock_map_fd; - struct tcpbpf_globals g = {0}; - const char *cg_path = "/foo"; - int error = EXIT_FAILURE; - struct bpf_object *obj; - int cg_fd = -1; - int retry = 10; - __u32 key = 0; - int rv; - - cg_fd = cgroup_setup_and_join(cg_path); - if (cg_fd < 0) - goto err; - - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { - printf("FAILED: load_bpf_file failed for: %s\n", file); - goto err; - } - - rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); - if (rv) { - printf("FAILED: bpf_prog_attach: %d (%s)\n", - error, strerror(errno)); - goto err; - } - - if (system("./tcp_server.py")) { - printf("FAILED: TCP server\n"); - goto err; - } - - map_fd = bpf_find_map(__func__, obj, "global_map"); - if (map_fd < 0) - goto err; - - sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results"); - if (sock_map_fd < 0) - goto err; - -retry_lookup: - rv = bpf_map_lookup_elem(map_fd, &key, &g); - if (rv != 0) { - printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); - goto err; - } - - if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) { - printf("Unexpected number of close events (%d), retrying!\n", - g.num_close_events); - usleep(100); - goto retry_lookup; - } - - if (verify_result(&g)) { - printf("FAILED: Wrong stats\n"); - goto err; - } - - if (verify_sockopt_result(sock_map_fd)) { - printf("FAILED: Wrong sockopt stats\n"); - goto err; - } - - printf("PASSED!\n"); - error = 0; -err: - bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); - close(cg_fd); - cleanup_cgroup_environment(); - return error; -} -- cgit v1.3.1 From 247f0ec361b7e0c5c67db8222873182fb8be5146 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 3 Nov 2020 13:34:56 -0800 Subject: selftests/bpf: Drop python client/server in favor of threads Drop the tcp_client/server.py files in favor of using a client and server thread within the test case. Specifically we spawn a new thread to play the role of the server, and the main testing thread plays the role of client. Add logic to the end of the run_test function to guarantee that the sockets are closed when we begin verifying results. Doing this we are able to reduce overhead since we don't have two python workers possibly floating around. In addition we don't have to worry about synchronization issues and as such the retry loop waiting for the threads to close the sockets can be dropped as we will have already closed the sockets in the local executable and synchronized the server thread. Signed-off-by: Alexander Duyck Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/160443929638.1086697.2430242340980315521.stgit@localhost.localdomain --- .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 95 ++++++++++++++++++---- tools/testing/selftests/bpf/tcp_client.py | 50 ------------ tools/testing/selftests/bpf/tcp_server.py | 80 ------------------ 3 files changed, 78 insertions(+), 147 deletions(-) delete mode 100755 tools/testing/selftests/bpf/tcp_client.py delete mode 100755 tools/testing/selftests/bpf/tcp_server.py (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c index caa8d3adec8a..616269abdb41 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c @@ -1,13 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include "test_tcpbpf.h" +#define LO_ADDR6 "::1" #define CG_NAME "/tcpbpf-user-test" -/* 3 comes from one listening socket + both ends of the connection */ -#define EXPECTED_CLOSE_EVENTS 3 +static __u32 duration; #define EXPECT_EQ(expected, actual, fmt) \ do { \ @@ -42,7 +43,9 @@ int verify_result(const struct tcpbpf_globals *result) EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); EXPECT_EQ(1, result->num_listen, PRIu32); - EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32); + + /* 3 comes from one listening socket + both ends of the connection */ + EXPECT_EQ(3, result->num_close_events, PRIu32); return ret; } @@ -66,6 +69,75 @@ int verify_sockopt_result(int sock_map_fd) return ret; } +static int run_test(void) +{ + int listen_fd = -1, cli_fd = -1, accept_fd = -1; + char buf[1000]; + int err = -1; + int i, rv; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); + if (CHECK(listen_fd == -1, "start_server", "listen_fd:%d errno:%d\n", + listen_fd, errno)) + goto done; + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK(cli_fd == -1, "connect_to_fd(listen_fd)", + "cli_fd:%d errno:%d\n", cli_fd, errno)) + goto done; + + accept_fd = accept(listen_fd, NULL, NULL); + if (CHECK(accept_fd == -1, "accept(listen_fd)", + "accept_fd:%d errno:%d\n", accept_fd, errno)) + goto done; + + /* Send 1000B of '+'s from cli_fd -> accept_fd */ + for (i = 0; i < 1000; i++) + buf[i] = '+'; + + rv = send(cli_fd, buf, 1000, 0); + if (CHECK(rv != 1000, "send(cli_fd)", "rv:%d errno:%d\n", rv, errno)) + goto done; + + rv = recv(accept_fd, buf, 1000, 0); + if (CHECK(rv != 1000, "recv(accept_fd)", "rv:%d errno:%d\n", rv, errno)) + goto done; + + /* Send 500B of '.'s from accept_fd ->cli_fd */ + for (i = 0; i < 500; i++) + buf[i] = '.'; + + rv = send(accept_fd, buf, 500, 0); + if (CHECK(rv != 500, "send(accept_fd)", "rv:%d errno:%d\n", rv, errno)) + goto done; + + rv = recv(cli_fd, buf, 500, 0); + if (CHECK(rv != 500, "recv(cli_fd)", "rv:%d errno:%d\n", rv, errno)) + goto done; + + /* + * shutdown accept first to guarantee correct ordering for + * bytes_received and bytes_acked when we go to verify the results. + */ + shutdown(accept_fd, SHUT_WR); + err = recv(cli_fd, buf, 1, 0); + if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", err, errno)) + goto done; + + shutdown(cli_fd, SHUT_WR); + err = recv(accept_fd, buf, 1, 0); + CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", err, errno); +done: + if (accept_fd != -1) + close(accept_fd); + if (cli_fd != -1) + close(cli_fd); + if (listen_fd != -1) + close(listen_fd); + + return err; +} + void test_tcpbpf_user(void) { const char *file = "test_tcpbpf_kern.o"; @@ -74,7 +146,6 @@ void test_tcpbpf_user(void) int error = EXIT_FAILURE; struct bpf_object *obj; int cg_fd = -1; - int retry = 10; __u32 key = 0; int rv; @@ -94,11 +165,6 @@ void test_tcpbpf_user(void) goto err; } - if (system("./tcp_server.py")) { - printf("FAILED: TCP server\n"); - goto err; - } - map_fd = bpf_find_map(__func__, obj, "global_map"); if (map_fd < 0) goto err; @@ -107,20 +173,15 @@ void test_tcpbpf_user(void) if (sock_map_fd < 0) goto err; -retry_lookup: + if (run_test()) + goto err; + rv = bpf_map_lookup_elem(map_fd, &key, &g); if (rv != 0) { printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); goto err; } - if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) { - printf("Unexpected number of close events (%d), retrying!\n", - g.num_close_events); - usleep(100); - goto retry_lookup; - } - if (verify_result(&g)) { printf("FAILED: Wrong stats\n"); goto err; diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py deleted file mode 100755 index bfff82be3fc1..000000000000 --- a/tools/testing/selftests/bpf/tcp_client.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 -# -# SPDX-License-Identifier: GPL-2.0 -# - -import sys, os, os.path, getopt -import socket, time -import subprocess -import select - -def read(sock, n): - buf = b'' - while len(buf) < n: - rem = n - len(buf) - try: s = sock.recv(rem) - except (socket.error) as e: return b'' - buf += s - return buf - -def send(sock, s): - total = len(s) - count = 0 - while count < total: - try: n = sock.send(s) - except (socket.error) as e: n = 0 - if n == 0: - return count; - count += n - return count - - -serverPort = int(sys.argv[1]) - -# create active socket -sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) -try: - sock.connect(('::1', serverPort)) -except socket.error as e: - sys.exit(1) - -buf = b'' -n = 0 -while n < 1000: - buf += b'+' - n += 1 - -sock.settimeout(1); -n = send(sock, buf) -n = read(sock, 500) -sys.exit(0) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py deleted file mode 100755 index 42ab8882f00f..000000000000 --- a/tools/testing/selftests/bpf/tcp_server.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 -# -# SPDX-License-Identifier: GPL-2.0 -# - -import sys, os, os.path, getopt -import socket, time -import subprocess -import select - -def read(sock, n): - buf = b'' - while len(buf) < n: - rem = n - len(buf) - try: s = sock.recv(rem) - except (socket.error) as e: return b'' - buf += s - return buf - -def send(sock, s): - total = len(s) - count = 0 - while count < total: - try: n = sock.send(s) - except (socket.error) as e: n = 0 - if n == 0: - return count; - count += n - return count - - -SERVER_PORT = 12877 -MAX_PORTS = 2 - -serverPort = SERVER_PORT -serverSocket = None - -# create passive socket -serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) - -try: serverSocket.bind(('::1', 0)) -except socket.error as msg: - print('bind fails: ' + str(msg)) - -sn = serverSocket.getsockname() -serverPort = sn[1] - -cmdStr = ("./tcp_client.py %d &") % (serverPort) -os.system(cmdStr) - -buf = b'' -n = 0 -while n < 500: - buf += b'.' - n += 1 - -serverSocket.listen(MAX_PORTS) -readList = [serverSocket] - -while True: - readyRead, readyWrite, inError = \ - select.select(readList, [], [], 2) - - if len(readyRead) > 0: - waitCount = 0 - for sock in readyRead: - if sock == serverSocket: - (clientSocket, address) = serverSocket.accept() - address = str(address[0]) - readList.append(clientSocket) - else: - sock.settimeout(1); - s = read(sock, 1000) - n = send(sock, buf) - sock.close() - serverSocket.close() - sys.exit(0) - else: - print('Select timeout!') - sys.exit(1) -- cgit v1.3.1 From d3813ea14b696053c076123239093822b527f0f7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 3 Nov 2020 13:35:04 -0800 Subject: selftests/bpf: Replace EXPECT_EQ with ASSERT_EQ and refactor verify_results There is already logic in test_progs.h for asserting that a value is expected to be another value. So instead of reinventing it we should just make use of ASSERT_EQ in tcpbpf_user.c. This will allow for better debugging and integrates much more closely with the test_progs framework. In addition we can refactor the code a bit to merge together the two verify functions and tie them together into a single function. Doing this helps to clean the code up a bit and makes it more readable as all the verification is now done in one function. Lastly we can relocate the verification to the end of the run_test since it is logically part of the test itself. With this we can drop the need for a return value from run_test since verification becomes the last step of the call and then immediately following is the tear down of the test setup. Signed-off-by: Alexander Duyck Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/160443930408.1086697.16101205859962113000.stgit@localhost.localdomain --- .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 115 ++++++++------------- 1 file changed, 43 insertions(+), 72 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c index 616269abdb41..22c359871af6 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include #include @@ -10,66 +9,56 @@ static __u32 duration; -#define EXPECT_EQ(expected, actual, fmt) \ - do { \ - if ((expected) != (actual)) { \ - printf(" Value of: " #actual "\n" \ - " Actual: %" fmt "\n" \ - " Expected: %" fmt "\n", \ - (actual), (expected)); \ - ret--; \ - } \ - } while (0) - -int verify_result(const struct tcpbpf_globals *result) +static void verify_result(int map_fd, int sock_map_fd) { - __u32 expected_events; - int ret = 0; - - expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | - (1 << BPF_SOCK_OPS_RWND_INIT) | - (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | - (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | - (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | - (1 << BPF_SOCK_OPS_NEEDS_ECN) | - (1 << BPF_SOCK_OPS_STATE_CB) | - (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); - - EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); - EXPECT_EQ(501ULL, result->bytes_received, "llu"); - EXPECT_EQ(1002ULL, result->bytes_acked, "llu"); - EXPECT_EQ(1, result->data_segs_in, PRIu32); - EXPECT_EQ(1, result->data_segs_out, PRIu32); - EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); - EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); - EXPECT_EQ(1, result->num_listen, PRIu32); + __u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | + (1 << BPF_SOCK_OPS_RWND_INIT) | + (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | + (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_NEEDS_ECN) | + (1 << BPF_SOCK_OPS_STATE_CB) | + (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); + struct tcpbpf_globals result; + __u32 key = 0; + int res, rv; + + rv = bpf_map_lookup_elem(map_fd, &key, &result); + if (CHECK(rv, "bpf_map_lookup_elem(map_fd)", "err:%d errno:%d", + rv, errno)) + return; + + /* check global map */ + CHECK(expected_events != result.event_map, "event_map", + "unexpected event_map: actual 0x%08x != expected 0x%08x\n", + result.event_map, expected_events); + + ASSERT_EQ(result.bytes_received, 501, "bytes_received"); + ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked"); + ASSERT_EQ(result.data_segs_in, 1, "data_segs_in"); + ASSERT_EQ(result.data_segs_out, 1, "data_segs_out"); + ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv"); + ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv"); + ASSERT_EQ(result.num_listen, 1, "num_listen"); /* 3 comes from one listening socket + both ends of the connection */ - EXPECT_EQ(3, result->num_close_events, PRIu32); - - return ret; -} - -int verify_sockopt_result(int sock_map_fd) -{ - __u32 key = 0; - int ret = 0; - int res; - int rv; + ASSERT_EQ(result.num_close_events, 3, "num_close_events"); /* check setsockopt for SAVE_SYN */ rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); - EXPECT_EQ(0, rv, "d"); - EXPECT_EQ(0, res, "d"); - key = 1; + CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d", + rv, errno); + ASSERT_EQ(res, 0, "bpf_setsockopt(TCP_SAVE_SYN)"); + /* check getsockopt for SAVED_SYN */ + key = 1; rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); - EXPECT_EQ(0, rv, "d"); - EXPECT_EQ(1, res, "d"); - return ret; + CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d", + rv, errno); + ASSERT_EQ(res, 1, "bpf_getsockopt(TCP_SAVED_SYN)"); } -static int run_test(void) +static void run_test(int map_fd, int sock_map_fd) { int listen_fd = -1, cli_fd = -1, accept_fd = -1; char buf[1000]; @@ -135,18 +124,17 @@ done: if (listen_fd != -1) close(listen_fd); - return err; + if (!err) + verify_result(map_fd, sock_map_fd); } void test_tcpbpf_user(void) { const char *file = "test_tcpbpf_kern.o"; int prog_fd, map_fd, sock_map_fd; - struct tcpbpf_globals g = {0}; int error = EXIT_FAILURE; struct bpf_object *obj; int cg_fd = -1; - __u32 key = 0; int rv; cg_fd = test__join_cgroup(CG_NAME); @@ -173,24 +161,7 @@ void test_tcpbpf_user(void) if (sock_map_fd < 0) goto err; - if (run_test()) - goto err; - - rv = bpf_map_lookup_elem(map_fd, &key, &g); - if (rv != 0) { - printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); - goto err; - } - - if (verify_result(&g)) { - printf("FAILED: Wrong stats\n"); - goto err; - } - - if (verify_sockopt_result(sock_map_fd)) { - printf("FAILED: Wrong sockopt stats\n"); - goto err; - } + run_test(map_fd, sock_map_fd); error = 0; err: -- cgit v1.3.1 From 0a099d1429c709020277d24a460d11ff8356a080 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 3 Nov 2020 13:35:11 -0800 Subject: selftests/bpf: Migrate tcpbpf_user.c to use BPF skeleton Update tcpbpf_user.c to make use of the BPF skeleton. Doing this we can simplify test_tcpbpf_user and reduce the overhead involved in setting up the test. In addition we can clean up the remaining bits such as the one remaining CHECK_FAIL at the end of test_tcpbpf_user so that the function only makes use of CHECK as needed. Signed-off-by: Alexander Duyck Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160443931155.1086697.17869006617113525162.stgit@localhost.localdomain --- .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 41 ++++++++-------------- 1 file changed, 14 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c index 22c359871af6..bef81648797a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c @@ -3,6 +3,7 @@ #include #include "test_tcpbpf.h" +#include "test_tcpbpf_kern.skel.h" #define LO_ADDR6 "::1" #define CG_NAME "/tcpbpf-user-test" @@ -130,44 +131,30 @@ done: void test_tcpbpf_user(void) { - const char *file = "test_tcpbpf_kern.o"; - int prog_fd, map_fd, sock_map_fd; - int error = EXIT_FAILURE; - struct bpf_object *obj; + struct test_tcpbpf_kern *skel; + int map_fd, sock_map_fd; int cg_fd = -1; - int rv; - cg_fd = test__join_cgroup(CG_NAME); - if (cg_fd < 0) - goto err; - - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { - printf("FAILED: load_bpf_file failed for: %s\n", file); - goto err; - } + skel = test_tcpbpf_kern__open_and_load(); + if (CHECK(!skel, "open and load skel", "failed")) + return; - rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); - if (rv) { - printf("FAILED: bpf_prog_attach: %d (%s)\n", - error, strerror(errno)); + cg_fd = test__join_cgroup(CG_NAME); + if (CHECK(cg_fd < 0, "test__join_cgroup(" CG_NAME ")", + "cg_fd:%d errno:%d", cg_fd, errno)) goto err; - } - map_fd = bpf_find_map(__func__, obj, "global_map"); - if (map_fd < 0) - goto err; + map_fd = bpf_map__fd(skel->maps.global_map); + sock_map_fd = bpf_map__fd(skel->maps.sockopt_results); - sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results"); - if (sock_map_fd < 0) + skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd); + if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)")) goto err; run_test(map_fd, sock_map_fd); - error = 0; err: - bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); if (cg_fd != -1) close(cg_fd); - - CHECK_FAIL(error); + test_tcpbpf_kern__destroy(skel); } -- cgit v1.3.1 From 21b5177e997c98643eaabd4b917f2e287395af86 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 3 Nov 2020 13:35:19 -0800 Subject: selftest/bpf: Use global variables instead of maps for test_tcpbpf_kern Use global variables instead of global_map and sockopt_results_map to track test data. Doing this greatly simplifies the code as there is not need to take the extra steps of updating the maps or looking up elements. Signed-off-by: Alexander Duyck Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/160443931900.1086697.6588858453575682351.stgit@localhost.localdomain --- .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 51 ++++--------- .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 86 ++++------------------ tools/testing/selftests/bpf/test_tcpbpf.h | 2 + 3 files changed, 31 insertions(+), 108 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c index bef81648797a..ab5281475f44 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c @@ -10,7 +10,7 @@ static __u32 duration; -static void verify_result(int map_fd, int sock_map_fd) +static void verify_result(struct tcpbpf_globals *result) { __u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | (1 << BPF_SOCK_OPS_RWND_INIT) | @@ -20,46 +20,31 @@ static void verify_result(int map_fd, int sock_map_fd) (1 << BPF_SOCK_OPS_NEEDS_ECN) | (1 << BPF_SOCK_OPS_STATE_CB) | (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); - struct tcpbpf_globals result; - __u32 key = 0; - int res, rv; - - rv = bpf_map_lookup_elem(map_fd, &key, &result); - if (CHECK(rv, "bpf_map_lookup_elem(map_fd)", "err:%d errno:%d", - rv, errno)) - return; /* check global map */ - CHECK(expected_events != result.event_map, "event_map", + CHECK(expected_events != result->event_map, "event_map", "unexpected event_map: actual 0x%08x != expected 0x%08x\n", - result.event_map, expected_events); + result->event_map, expected_events); - ASSERT_EQ(result.bytes_received, 501, "bytes_received"); - ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked"); - ASSERT_EQ(result.data_segs_in, 1, "data_segs_in"); - ASSERT_EQ(result.data_segs_out, 1, "data_segs_out"); - ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv"); - ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv"); - ASSERT_EQ(result.num_listen, 1, "num_listen"); + ASSERT_EQ(result->bytes_received, 501, "bytes_received"); + ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked"); + ASSERT_EQ(result->data_segs_in, 1, "data_segs_in"); + ASSERT_EQ(result->data_segs_out, 1, "data_segs_out"); + ASSERT_EQ(result->bad_cb_test_rv, 0x80, "bad_cb_test_rv"); + ASSERT_EQ(result->good_cb_test_rv, 0, "good_cb_test_rv"); + ASSERT_EQ(result->num_listen, 1, "num_listen"); /* 3 comes from one listening socket + both ends of the connection */ - ASSERT_EQ(result.num_close_events, 3, "num_close_events"); + ASSERT_EQ(result->num_close_events, 3, "num_close_events"); /* check setsockopt for SAVE_SYN */ - rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); - CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d", - rv, errno); - ASSERT_EQ(res, 0, "bpf_setsockopt(TCP_SAVE_SYN)"); + ASSERT_EQ(result->tcp_save_syn, 0, "tcp_save_syn"); /* check getsockopt for SAVED_SYN */ - key = 1; - rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); - CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d", - rv, errno); - ASSERT_EQ(res, 1, "bpf_getsockopt(TCP_SAVED_SYN)"); + ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn"); } -static void run_test(int map_fd, int sock_map_fd) +static void run_test(struct tcpbpf_globals *result) { int listen_fd = -1, cli_fd = -1, accept_fd = -1; char buf[1000]; @@ -126,13 +111,12 @@ done: close(listen_fd); if (!err) - verify_result(map_fd, sock_map_fd); + verify_result(result); } void test_tcpbpf_user(void) { struct test_tcpbpf_kern *skel; - int map_fd, sock_map_fd; int cg_fd = -1; skel = test_tcpbpf_kern__open_and_load(); @@ -144,14 +128,11 @@ void test_tcpbpf_user(void) "cg_fd:%d errno:%d", cg_fd, errno)) goto err; - map_fd = bpf_map__fd(skel->maps.global_map); - sock_map_fd = bpf_map__fd(skel->maps.sockopt_results); - skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd); if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)")) goto err; - run_test(map_fd, sock_map_fd); + run_test(&skel->bss->global); err: if (cg_fd != -1) diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 3e6912e4df3d..e85e49deba70 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -14,40 +14,7 @@ #include #include "test_tcpbpf.h" -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 4); - __type(key, __u32); - __type(value, struct tcpbpf_globals); -} global_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 2); - __type(key, __u32); - __type(value, int); -} sockopt_results SEC(".maps"); - -static inline void update_event_map(int event) -{ - __u32 key = 0; - struct tcpbpf_globals g, *gp; - - gp = bpf_map_lookup_elem(&global_map, &key); - if (gp == NULL) { - struct tcpbpf_globals g = {0}; - - g.event_map |= (1 << event); - bpf_map_update_elem(&global_map, &key, &g, - BPF_ANY); - } else { - g = *gp; - g.event_map |= (1 << event); - bpf_map_update_elem(&global_map, &key, &g, - BPF_ANY); - } -} - +struct tcpbpf_globals global = {}; int _version SEC("version") = 1; SEC("sockops") @@ -105,29 +72,15 @@ int bpf_testcb(struct bpf_sock_ops *skops) op = (int) skops->op; - update_event_map(op); + global.event_map |= (1 << op); switch (op) { case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: /* Test failure to set largest cb flag (assumes not defined) */ - bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); + global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); /* Set callback */ - good_call_rv = bpf_sock_ops_cb_flags_set(skops, + global.good_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); - /* Update results */ - { - __u32 key = 0; - struct tcpbpf_globals g, *gp; - - gp = bpf_map_lookup_elem(&global_map, &key); - if (!gp) - break; - g = *gp; - g.bad_cb_test_rv = bad_call_rv; - g.good_cb_test_rv = good_call_rv; - bpf_map_update_elem(&global_map, &key, &g, - BPF_ANY); - } break; case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: skops->sk_txhash = 0x12345f; @@ -143,10 +96,8 @@ int bpf_testcb(struct bpf_sock_ops *skops) thdr = (struct tcphdr *)(header + offset); v = thdr->syn; - __u32 key = 1; - bpf_map_update_elem(&sockopt_results, &key, &v, - BPF_ANY); + global.tcp_saved_syn = v; } } break; @@ -156,25 +107,16 @@ int bpf_testcb(struct bpf_sock_ops *skops) break; case BPF_SOCK_OPS_STATE_CB: if (skops->args[1] == BPF_TCP_CLOSE) { - __u32 key = 0; - struct tcpbpf_globals g, *gp; - - gp = bpf_map_lookup_elem(&global_map, &key); - if (!gp) - break; - g = *gp; if (skops->args[0] == BPF_TCP_LISTEN) { - g.num_listen++; + global.num_listen++; } else { - g.total_retrans = skops->total_retrans; - g.data_segs_in = skops->data_segs_in; - g.data_segs_out = skops->data_segs_out; - g.bytes_received = skops->bytes_received; - g.bytes_acked = skops->bytes_acked; + global.total_retrans = skops->total_retrans; + global.data_segs_in = skops->data_segs_in; + global.data_segs_out = skops->data_segs_out; + global.bytes_received = skops->bytes_received; + global.bytes_acked = skops->bytes_acked; } - g.num_close_events++; - bpf_map_update_elem(&global_map, &key, &g, - BPF_ANY); + global.num_close_events++; } break; case BPF_SOCK_OPS_TCP_LISTEN_CB: @@ -182,9 +124,7 @@ int bpf_testcb(struct bpf_sock_ops *skops) v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN, &save_syn, sizeof(save_syn)); /* Update global map w/ result of setsock opt */ - __u32 key = 0; - - bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY); + global.tcp_save_syn = v; break; default: rv = -1; diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h index 6220b95cbd02..0ed33521cbbb 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf.h +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -14,5 +14,7 @@ struct tcpbpf_globals { __u64 bytes_acked; __u32 num_listen; __u32 num_close_events; + __u32 tcp_save_syn; + __u32 tcp_saved_syn; }; #endif -- cgit v1.3.1 From a701d28e2d997705ae4376753af6e35b20029cef Mon Sep 17 00:00:00 2001 From: Dengcheng Zhu Date: Mon, 19 Oct 2020 15:21:24 +0800 Subject: perf annotate mips: Add perf arch instructions annotate handlers Support the MIPS architecture using the ins_ops association method. With this patch, perf-annotate can work well on MIPS. Testing it with a perf.data file collected on a mips machine: $./perf annotate -i perf.data : Disassembly of section .text: : : 00000000000be6a0 : : get_next_seq(): 0.00 : be6a0: lw v0,0(a0) 0.00 : be6a4: daddiu sp,sp,-128 0.00 : be6a8: ld a7,72(a0) 0.00 : be6ac: gssq s5,s4,80(sp) 0.00 : be6b0: gssq s1,s0,48(sp) 0.00 : be6b4: gssq s8,gp,112(sp) 0.00 : be6b8: gssq s7,s6,96(sp) 0.00 : be6bc: gssq s3,s2,64(sp) 0.00 : be6c0: sd a3,0(sp) 0.00 : be6c4: move s0,a0 0.00 : be6c8: sd v0,32(sp) 0.00 : be6cc: sd a5,8(sp) 0.00 : be6d0: sd zero,8(a0) 0.00 : be6d4: sd a6,16(sp) 0.00 : be6d8: ld s2,48(a0) 8.53 : be6dc: ld s1,40(a0) 9.42 : be6e0: ld v1,32(a0) 0.00 : be6e4: nop 0.00 : be6e8: ld s4,24(a0) 0.00 : be6ec: ld s5,16(a0) 0.00 : be6f0: sd a7,40(sp) 10.11 : be6f4: ld s6,64(a0) ... The original patch link: https://lore.kernel.org/patchwork/patch/1180480/ Signed-off-by: Dengcheng Zhu Cc: Dengcheng Zhu Cc: Jiaxun Yang Cc: Peter Zijlstra Cc: Xuefeng Li Cc: linux-mips@vger.kernel.org [ fanpeng@loongson.cn: Add missing "bgtzl", "bltzl", "bgezl", "blezl", "beql" and "bnel" for pre-R6processors ] Signed-off-by: Peng Fan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/mips/Build | 2 +- tools/perf/arch/mips/annotate/instructions.c | 46 ++++++++++++++++++++++++++++ tools/perf/util/annotate.c | 8 +++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/mips/annotate/instructions.c (limited to 'tools') diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build index 1bb8bf6d7fd4..e4e5f33c84d8 100644 --- a/tools/perf/arch/mips/Build +++ b/tools/perf/arch/mips/Build @@ -1 +1 @@ -# empty +perf-y += util/ diff --git a/tools/perf/arch/mips/annotate/instructions.c b/tools/perf/arch/mips/annotate/instructions.c new file mode 100644 index 000000000000..340993f2a897 --- /dev/null +++ b/tools/perf/arch/mips/annotate/instructions.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +static +struct ins_ops *mips__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strncmp(name, "bal", 3) || + !strncmp(name, "bgezal", 6) || + !strncmp(name, "bltzal", 6) || + !strncmp(name, "bgtzal", 6) || + !strncmp(name, "blezal", 6) || + !strncmp(name, "beqzal", 6) || + !strncmp(name, "bnezal", 6) || + !strncmp(name, "bgtzl", 5) || + !strncmp(name, "bltzl", 5) || + !strncmp(name, "bgezl", 5) || + !strncmp(name, "blezl", 5) || + !strncmp(name, "jialc", 5) || + !strncmp(name, "beql", 4) || + !strncmp(name, "bnel", 4) || + !strncmp(name, "jal", 3)) + ops = &call_ops; + else if (!strncmp(name, "jr", 2)) + ops = &ret_ops; + else if (name[0] == 'j' || name[0] == 'b') + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static +int mips__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->associate_instruction_ops = mips__associate_ins_ops; + arch->initialized = true; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 6c8575e182ed..e52053a6ad42 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -152,6 +152,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/arm/annotate/instructions.c" #include "arch/arm64/annotate/instructions.c" #include "arch/csky/annotate/instructions.c" +#include "arch/mips/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" @@ -174,6 +175,13 @@ static struct arch architectures[] = { .name = "csky", .init = csky__annotate_init, }, + { + .name = "mips", + .init = mips__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, { .name = "x86", .init = x86__annotate_init, -- cgit v1.3.1 From a7c77c4f52c80fffc53b4c616a95f96d57170933 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 19 Oct 2020 16:25:45 -0700 Subject: perf version: Add a feature for libpfm4 If perf is built with libpfm4 (LIBPFM4=1) then advertise it in perf -vv. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201019232545.4047264-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-version.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index d09ec2f03071..9cd074a3d825 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -80,6 +80,7 @@ static void library_status(void) STATUS(HAVE_LIBBPF_SUPPORT, bpf); STATUS(HAVE_AIO_SUPPORT, aio); STATUS(HAVE_ZSTD_SUPPORT, zstd); + STATUS(HAVE_LIBPFM, libpfm4); } int cmd_version(int argc, const char **argv) -- cgit v1.3.1 From 0ee281e1e4e12f8c09b99f80a2482a55cd7d6bca Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 08:36:13 +0800 Subject: perf mem2node: Improve warning if detected no memory nodes Some archs (e.g. x86 and Arm64) don't enable the configuration CONFIG_MEMORY_HOTPLUG by default, if this configuration is not enabled when build the kernel image, the SysFS for memory nodes will be missed. This results in perf tool has no chance to catpure the memory nodes information, when perf tool reports the result and detects no memory nodes, it outputs "assertion failed at util/mem2node.c:99". The output log doesn't give out reason for the failure and users have no clue for how to fix it. This patch changes to use explicit way for warning: it tells user that detected no memory nodes and suggests to enable CONFIG_MEMORY_HOTPLUG for kernel building. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201019003613.8399-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem2node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c index c84f5841c7ab..03a7d7b27737 100644 --- a/tools/perf/util/mem2node.c +++ b/tools/perf/util/mem2node.c @@ -96,7 +96,8 @@ int mem2node__init(struct mem2node *map, struct perf_env *env) /* Cut unused entries, due to merging. */ tmp_entries = realloc(entries, sizeof(*entries) * j); - if (tmp_entries || WARN_ON_ONCE(j == 0)) + if (tmp_entries || + WARN_ONCE(j == 0, "No memory nodes, is CONFIG_MEMORY_HOTPLUG enabled?\n")) entries = tmp_entries; for (i = 0; i < j; i++) { -- cgit v1.3.1 From 3989bbf9607d6716900d9df91c46a2ce8a504b93 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 18:02:35 +0800 Subject: perf tests tsc: Make tsc testing as a common testing x86 arch provides the testing for conversion between tsc and perf time, the testing is located in x86 arch folder. Move this testing out from x86 arch folder and place it into the common testing folder, so allows to execute tsc testing on other architectures (e.g. Arm64). This patch removes the inclusion of "arch-tests.h" from the testing code, this can avoid building failure if any arch has no this header file. Committer testing: $ perf test -v tsc Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc 70: Convert perf time to TSC : --- start --- test child forked, pid 4032834 mmap size 528384B 1st event perf time 165409788843605 tsc 336578703793868 rdtsc time 165409788854986 tsc 336578703837038 2nd event perf time 165409788855487 tsc 336578703838935 test child finished with 0 ---- end ---- Convert perf time to TSC: Ok $ Signed-off-by: Leo Yan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201019100236.23675-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 - tools/perf/arch/x86/tests/Build | 1 - tools/perf/arch/x86/tests/arch-tests.c | 4 - tools/perf/arch/x86/tests/perf-time-to-tsc.c | 173 --------------------------- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 + tools/perf/tests/perf-time-to-tsc.c | 171 ++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 8 files changed, 177 insertions(+), 179 deletions(-) delete mode 100644 tools/perf/arch/x86/tests/perf-time-to-tsc.c create mode 100644 tools/perf/tests/perf-time-to-tsc.c (limited to 'tools') diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index c41c5affe4be..6a54b94f1c25 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -7,7 +7,6 @@ struct test; /* Tests */ int test__rdpmc(struct test *test __maybe_unused, int subtest); -int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); int test__intel_pt_pkt_decoder(struct test *test, int subtest); int test__bp_modify(struct test *test, int subtest); diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 2997c506550c..36d4f248b51d 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,6 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += rdpmc.o -perf-y += perf-time-to-tsc.o perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 6763135aec17..bc25d727b4e9 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -8,10 +8,6 @@ struct test arch_tests[] = { .desc = "x86 rdpmc", .func = test__rdpmc, }, - { - .desc = "Convert perf time to TSC", - .func = test__perf_time_to_tsc, - }, #ifdef HAVE_DWARF_UNWIND_SUPPORT { .desc = "DWARF unwind", diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c deleted file mode 100644 index 026d32ed078e..000000000000 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ /dev/null @@ -1,173 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "debug.h" -#include "parse-events.h" -#include "evlist.h" -#include "evsel.h" -#include "thread_map.h" -#include "record.h" -#include "tsc.h" -#include "util/mmap.h" -#include "tests/tests.h" - -#include "arch-tests.h" - -#define CHECK__(x) { \ - while ((x) < 0) { \ - pr_debug(#x " failed!\n"); \ - goto out_err; \ - } \ -} - -#define CHECK_NOT_NULL__(x) { \ - while ((x) == NULL) { \ - pr_debug(#x " failed!\n"); \ - goto out_err; \ - } \ -} - -/** - * test__perf_time_to_tsc - test converting perf time to TSC. - * - * This function implements a test that checks that the conversion of perf time - * to and from TSC is consistent with the order of events. If the test passes - * %0 is returned, otherwise %-1 is returned. If TSC conversion is not - * supported then then the test passes but " (not supported)" is printed. - */ -int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused) -{ - struct record_opts opts = { - .mmap_pages = UINT_MAX, - .user_freq = UINT_MAX, - .user_interval = ULLONG_MAX, - .target = { - .uses_mmap = true, - }, - .sample_time = true, - }; - struct perf_thread_map *threads = NULL; - struct perf_cpu_map *cpus = NULL; - struct evlist *evlist = NULL; - struct evsel *evsel = NULL; - int err = -1, ret, i; - const char *comm1, *comm2; - struct perf_tsc_conversion tc; - struct perf_event_mmap_page *pc; - union perf_event *event; - u64 test_tsc, comm1_tsc, comm2_tsc; - u64 test_time, comm1_time = 0, comm2_time = 0; - struct mmap *md; - - threads = thread_map__new(-1, getpid(), UINT_MAX); - CHECK_NOT_NULL__(threads); - - cpus = perf_cpu_map__new(NULL); - CHECK_NOT_NULL__(cpus); - - evlist = evlist__new(); - CHECK_NOT_NULL__(evlist); - - perf_evlist__set_maps(&evlist->core, cpus, threads); - - CHECK__(parse_events(evlist, "cycles:u", NULL)); - - perf_evlist__config(evlist, &opts, NULL); - - evsel = evlist__first(evlist); - - evsel->core.attr.comm = 1; - evsel->core.attr.disabled = 1; - evsel->core.attr.enable_on_exec = 0; - - CHECK__(evlist__open(evlist)); - - CHECK__(evlist__mmap(evlist, UINT_MAX)); - - pc = evlist->mmap[0].core.base; - ret = perf_read_tsc_conversion(pc, &tc); - if (ret) { - if (ret == -EOPNOTSUPP) { - fprintf(stderr, " (not supported)"); - return 0; - } - goto out_err; - } - - evlist__enable(evlist); - - comm1 = "Test COMM 1"; - CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0)); - - test_tsc = rdtsc(); - - comm2 = "Test COMM 2"; - CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0)); - - evlist__disable(evlist); - - for (i = 0; i < evlist->core.nr_mmaps; i++) { - md = &evlist->mmap[i]; - if (perf_mmap__read_init(&md->core) < 0) - continue; - - while ((event = perf_mmap__read_event(&md->core)) != NULL) { - struct perf_sample sample; - - if (event->header.type != PERF_RECORD_COMM || - (pid_t)event->comm.pid != getpid() || - (pid_t)event->comm.tid != getpid()) - goto next_event; - - if (strcmp(event->comm.comm, comm1) == 0) { - CHECK__(evsel__parse_sample(evsel, event, &sample)); - comm1_time = sample.time; - } - if (strcmp(event->comm.comm, comm2) == 0) { - CHECK__(evsel__parse_sample(evsel, event, &sample)); - comm2_time = sample.time; - } -next_event: - perf_mmap__consume(&md->core); - } - perf_mmap__read_done(&md->core); - } - - if (!comm1_time || !comm2_time) - goto out_err; - - test_time = tsc_to_perf_time(test_tsc, &tc); - comm1_tsc = perf_time_to_tsc(comm1_time, &tc); - comm2_tsc = perf_time_to_tsc(comm2_time, &tc); - - pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n", - comm1_time, comm1_tsc); - pr_debug("rdtsc time %"PRIu64" tsc %"PRIu64"\n", - test_time, test_tsc); - pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n", - comm2_time, comm2_tsc); - - if (test_time <= comm1_time || - test_time >= comm2_time) - goto out_err; - - if (test_tsc <= comm1_tsc || - test_tsc >= comm2_tsc) - goto out_err; - - err = 0; - -out_err: - evlist__delete(evlist); - return err; -} diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 4d15bf6041fb..aa4dc4f5abde 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -62,6 +62,7 @@ perf-y += pfm.o perf-y += parse-metric.o perf-y += pe-file-parsing.o perf-y += expand-cgroup.o +perf-y += perf-time-to-tsc.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 132bdb3e6c31..02e7bbf70419 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -349,6 +349,10 @@ static struct test generic_tests[] = { .desc = "Event expansion for cgroups", .func = test__expand_cgroup_events, }, + { + .desc = "Convert perf time to TSC", + .func = test__perf_time_to_tsc, + }, { .func = NULL, }, diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c new file mode 100644 index 000000000000..aee97c16c0d9 --- /dev/null +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "parse-events.h" +#include "evlist.h" +#include "evsel.h" +#include "thread_map.h" +#include "record.h" +#include "tsc.h" +#include "mmap.h" +#include "tests.h" + +#define CHECK__(x) { \ + while ((x) < 0) { \ + pr_debug(#x " failed!\n"); \ + goto out_err; \ + } \ +} + +#define CHECK_NOT_NULL__(x) { \ + while ((x) == NULL) { \ + pr_debug(#x " failed!\n"); \ + goto out_err; \ + } \ +} + +/** + * test__perf_time_to_tsc - test converting perf time to TSC. + * + * This function implements a test that checks that the conversion of perf time + * to and from TSC is consistent with the order of events. If the test passes + * %0 is returned, otherwise %-1 is returned. If TSC conversion is not + * supported then then the test passes but " (not supported)" is printed. + */ +int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + struct record_opts opts = { + .mmap_pages = UINT_MAX, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .target = { + .uses_mmap = true, + }, + .sample_time = true, + }; + struct perf_thread_map *threads = NULL; + struct perf_cpu_map *cpus = NULL; + struct evlist *evlist = NULL; + struct evsel *evsel = NULL; + int err = -1, ret, i; + const char *comm1, *comm2; + struct perf_tsc_conversion tc; + struct perf_event_mmap_page *pc; + union perf_event *event; + u64 test_tsc, comm1_tsc, comm2_tsc; + u64 test_time, comm1_time = 0, comm2_time = 0; + struct mmap *md; + + threads = thread_map__new(-1, getpid(), UINT_MAX); + CHECK_NOT_NULL__(threads); + + cpus = perf_cpu_map__new(NULL); + CHECK_NOT_NULL__(cpus); + + evlist = evlist__new(); + CHECK_NOT_NULL__(evlist); + + perf_evlist__set_maps(&evlist->core, cpus, threads); + + CHECK__(parse_events(evlist, "cycles:u", NULL)); + + perf_evlist__config(evlist, &opts, NULL); + + evsel = evlist__first(evlist); + + evsel->core.attr.comm = 1; + evsel->core.attr.disabled = 1; + evsel->core.attr.enable_on_exec = 0; + + CHECK__(evlist__open(evlist)); + + CHECK__(evlist__mmap(evlist, UINT_MAX)); + + pc = evlist->mmap[0].core.base; + ret = perf_read_tsc_conversion(pc, &tc); + if (ret) { + if (ret == -EOPNOTSUPP) { + fprintf(stderr, " (not supported)"); + return 0; + } + goto out_err; + } + + evlist__enable(evlist); + + comm1 = "Test COMM 1"; + CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0)); + + test_tsc = rdtsc(); + + comm2 = "Test COMM 2"; + CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0)); + + evlist__disable(evlist); + + for (i = 0; i < evlist->core.nr_mmaps; i++) { + md = &evlist->mmap[i]; + if (perf_mmap__read_init(&md->core) < 0) + continue; + + while ((event = perf_mmap__read_event(&md->core)) != NULL) { + struct perf_sample sample; + + if (event->header.type != PERF_RECORD_COMM || + (pid_t)event->comm.pid != getpid() || + (pid_t)event->comm.tid != getpid()) + goto next_event; + + if (strcmp(event->comm.comm, comm1) == 0) { + CHECK__(evsel__parse_sample(evsel, event, &sample)); + comm1_time = sample.time; + } + if (strcmp(event->comm.comm, comm2) == 0) { + CHECK__(evsel__parse_sample(evsel, event, &sample)); + comm2_time = sample.time; + } +next_event: + perf_mmap__consume(&md->core); + } + perf_mmap__read_done(&md->core); + } + + if (!comm1_time || !comm2_time) + goto out_err; + + test_time = tsc_to_perf_time(test_tsc, &tc); + comm1_tsc = perf_time_to_tsc(comm1_time, &tc); + comm2_tsc = perf_time_to_tsc(comm2_time, &tc); + + pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n", + comm1_time, comm1_tsc); + pr_debug("rdtsc time %"PRIu64" tsc %"PRIu64"\n", + test_time, test_tsc); + pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n", + comm2_time, comm2_tsc); + + if (test_time <= comm1_time || + test_time >= comm2_time) + goto out_err; + + if (test_tsc <= comm1_tsc || + test_tsc >= comm2_tsc) + goto out_err; + + err = 0; + +out_err: + evlist__delete(evlist); + return err; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c85a2c08e407..c9b180e640e5 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -124,6 +124,7 @@ int test__pfm_subtest_get_nr(void); int test__parse_metric(struct test *test, int subtest); int test__pe_file_parsing(struct test *test, int subtest); int test__expand_cgroup_events(struct test *test, int subtest); +int test__perf_time_to_tsc(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); -- cgit v1.3.1 From 248dd9b591db5bc5fb46a0e015753cfcfe60a345 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 18:02:36 +0800 Subject: perf tests tsc: Add checking helper is_supported() So far tsc is enabled on x86_64, i386 and Arm64 architectures, add checking helper to skip this testing for other architectures. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201019100236.23675-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/perf-time-to-tsc.c | 13 +++++++++++++ tools/perf/tests/tests.h | 1 + 3 files changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 02e7bbf70419..a185904c47f3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -352,6 +352,7 @@ static struct test generic_tests[] = { { .desc = "Convert perf time to TSC", .func = test__perf_time_to_tsc, + .is_supported = test__tsc_is_supported, }, { .func = NULL, diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index aee97c16c0d9..a9560e0f6360 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -169,3 +169,16 @@ out_err: evlist__delete(evlist); return err; } + +bool test__tsc_is_supported(void) +{ + /* + * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. + * Just enable the test for x86_64/i386 and Arm64 archs. + */ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) + return true; +#else + return false; +#endif +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c9b180e640e5..b1f2aac93b33 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -129,6 +129,7 @@ int test__perf_time_to_tsc(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); bool test__wp_is_supported(void); +bool test__tsc_is_supported(void); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT -- cgit v1.3.1 From cc3b964d5eb49d0c9da08760f8760bb6945f1df5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 16 Oct 2020 16:16:50 +0300 Subject: perf test: Implement skip_reason callback for watchpoint tests Currently reason for skipping the read only watchpoint test is only seen when running in verbose mode: $ perf test watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : Skip 23.2: Write Only Watchpoint : Ok 23.3: Read / Write Watchpoint : Ok 23.4: Modify Watchpoint : Ok $ perf test -v watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : --- start --- test child forked, pid 60204 Hardware does not support read only watchpoints. test child finished with -2 Implement skip_reason callback for the watchpoint tests, so that it's easy to see reason why the test is skipped: $ perf test watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : Skip (missing hardware support) 23.2: Write Only Watchpoint : Ok 23.3: Read / Write Watchpoint : Ok 23.4: Modify Watchpoint : Ok Signed-off-by: Tommi Rantala Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201016131650.72476-1-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/wp.c | 21 +++++++++++++++------ 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index a185904c47f3..7273823d0d02 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -142,6 +142,7 @@ static struct test generic_tests[] = { .skip_if_fail = false, .get_nr = test__wp_subtest_get_nr, .get_desc = test__wp_subtest_get_desc, + .skip_reason = test__wp_subtest_skip_reason, }, }, { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b1f2aac93b33..8e24a61fe4c2 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -66,6 +66,7 @@ int test__bp_signal_overflow(struct test *test, int subtest); int test__bp_accounting(struct test *test, int subtest); int test__wp(struct test *test, int subtest); const char *test__wp_subtest_get_desc(int subtest); +const char *test__wp_subtest_skip_reason(int subtest); int test__wp_subtest_get_nr(void); int test__task_exit(struct test *test, int subtest); int test__mem(struct test *test, int subtest); diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index d262d6639829..9387fa76faa5 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -174,10 +174,12 @@ static bool wp_ro_supported(void) #endif } -static void wp_ro_skip_msg(void) +static const char *wp_ro_skip_msg(void) { #if defined (__x86_64__) || defined (__i386__) - pr_debug("Hardware does not support read only watchpoints.\n"); + return "missing hardware support"; +#else + return NULL; #endif } @@ -185,7 +187,7 @@ static struct { const char *desc; int (*target_func)(void); bool (*is_supported)(void); - void (*skip_msg)(void); + const char *(*skip_msg)(void); } wp_testcase_table[] = { { .desc = "Read Only Watchpoint", @@ -219,16 +221,23 @@ const char *test__wp_subtest_get_desc(int i) return wp_testcase_table[i].desc; } +const char *test__wp_subtest_skip_reason(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return NULL; + if (!wp_testcase_table[i].skip_msg) + return NULL; + return wp_testcase_table[i].skip_msg(); +} + int test__wp(struct test *test __maybe_unused, int i) { if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) return TEST_FAIL; if (wp_testcase_table[i].is_supported && - !wp_testcase_table[i].is_supported()) { - wp_testcase_table[i].skip_msg(); + !wp_testcase_table[i].is_supported()) return TEST_SKIP; - } return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL; } -- cgit v1.3.1 From c18cf78d7969db89934587fa476220eefe7bd4bd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 14:12:37 -0300 Subject: perf bpf: Enclose libbpf.h include within HAVE_LIBBPF_SUPPORT As it uses the 'deprecated' attribute in a way that breaks the build with old gcc compilers, so to continue being able to build in such systems where NO_LIBBPF=1 is being used, enclose it under HAVE_LIBBPF_SUPPORT. 1 centos:6 : FAIL gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23) 2 oraclelinux:6 : FAIL gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23.0.1) CC /tmp/build/perf/builtin-record.o In file included from util/bpf-loader.h:11, from builtin-record.c:39: /git/linux/tools/lib/bpf/libbpf.h:203: error: wrong number of arguments specified for 'deprecated' attribute Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.h | 3 +++ tools/perf/util/parse-events.c | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 25251d63164c..5d1c725cea29 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -8,6 +8,8 @@ #include #include + +#ifdef HAVE_LIBBPF_SUPPORT #include enum bpf_loader_errno { @@ -38,6 +40,7 @@ enum bpf_loader_errno { BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */ __BPF_LOADER_ERRNO__END, }; +#endif // HAVE_LIBBPF_SUPPORT struct evsel; struct evlist; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3b273580fb84..3b581d7b3213 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -668,6 +668,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, return ret; } +#ifdef HAVE_LIBBPF_SUPPORT struct __add_bpf_event_param { struct parse_events_state *parse_state; struct list_head *list; @@ -900,6 +901,30 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, list_splice_tail(&obj_head_config, head_config); return err; } +#else // HAVE_LIBBPF_SUPPORT +int parse_events_load_bpf_obj(struct parse_events_state *parse_state, + struct list_head *list __maybe_unused, + struct bpf_object *obj __maybe_unused, + struct list_head *head_config __maybe_unused) +{ + parse_events__handle_error(parse_state->error, 0, + strdup("BPF support is not compiled"), + strdup("Make sure libbpf-devel is available at build time.")); + return -ENOTSUP; +} + +int parse_events_load_bpf(struct parse_events_state *parse_state, + struct list_head *list __maybe_unused, + char *bpf_file_name __maybe_unused, + bool source __maybe_unused, + struct list_head *head_config __maybe_unused) +{ + parse_events__handle_error(parse_state->error, 0, + strdup("BPF support is not compiled"), + strdup("Make sure libbpf-devel is available at build time.")); + return -ENOTSUP; +} +#endif // HAVE_LIBBPF_SUPPORT static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) -- cgit v1.3.1 From 38219f24116ace9b0e604f2ced9c7dbef3041058 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:00:51 -0300 Subject: perf tests: Skip the llvm and bpf tests if HAVE_LIBBPF_SUPPORT isn't defined If either NO_LIBBPF=1 is passed, explicitely disabling it or if libbpf is not available due to some missing dependency, skip its tests, telling the user the feature isn't available. # perf test 40: LLVM search and compile : Skip (not compiled in) 41: Session topology : Ok 42: BPF filter : Skip (not compiled in) Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 4 ++-- tools/perf/tests/llvm.c | 30 +++++++++++++++++++----------- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index cd77e334e577..d880c588a951 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -9,12 +9,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include "tests.h" #include "llvm.h" @@ -25,6 +23,8 @@ #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test" #ifdef HAVE_LIBBPF_SUPPORT +#include +#include static int epoll_pwait_loop(void) { diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index ae6cda81c209..98da8a8757ab 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -2,13 +2,13 @@ #include #include #include -#include -#include -#include "llvm.h" #include "tests.h" #include "debug.h" #ifdef HAVE_LIBBPF_SUPPORT +#include +#include +#include "llvm.h" static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { struct bpf_object *obj; @@ -19,14 +19,6 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) bpf_object__close(obj); return TEST_OK; } -#else -static int test__bpf_parsing(void *obj_buf __maybe_unused, - size_t obj_buf_sz __maybe_unused) -{ - pr_debug("Skip bpf parsing\n"); - return TEST_OK; -} -#endif static struct { const char *source; @@ -170,3 +162,19 @@ const char *test__llvm_subtest_get_desc(int subtest) return bpf_source_table[subtest].desc; } +#else //HAVE_LIBBPF_SUPPORT +int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + return TEST_SKIP; +} + +int test__llvm_subtest_get_nr(void) +{ + return 0; +} + +const char *test__llvm_subtest_get_desc(int subtest __maybe_unused) +{ + return NULL; +} +#endif // HAVE_LIBBPF_SUPPORT -- cgit v1.3.1 From 20e88c6076fc50ebf0560e730349000ff2da94fd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:48:23 -0300 Subject: perf annotate: Move bpf header inclusion to inside HAVE_LIBBPF_SUPPORT No need to include it otherwise. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e52053a6ad42..ce8c07bc8c56 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -10,10 +10,6 @@ #include #include #include -#include -#include -#include -#include #include "util.h" // hex_width() #include "ui/ui.h" #include "sort.h" @@ -1684,6 +1680,10 @@ fallback: #define PACKAGE "perf" #include #include +#include +#include +#include +#include static int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args) -- cgit v1.3.1 From ef0580ecd8b0306acf09b7a7508d72cafc67896d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:57:21 -0300 Subject: perf env: Conditionally compile BPF support code on having HAVE_LIBBPF_SUPPORT If libbpf isn't selected, no need for a bunch of related code, that were not even being used, as code using these perf_env methods was also enclosed in HAVE_LIBBPF_SUPPORT. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 14 ++++++++++---- tools/perf/util/env.c | 15 ++++++++++++--- tools/perf/util/env.h | 4 ++-- tools/perf/util/header.c | 21 ++++++++------------- 4 files changed, 32 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 55c11e854fe4..89b5fd2b5de3 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -11,8 +11,10 @@ #include #include #include +#ifdef HAVE_LIBBPF_SUPPORT #include #include "bpf-event.h" +#endif #include "compress.h" #include "env.h" #include "namespaces.h" @@ -728,6 +730,7 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) return false; } +#ifdef HAVE_LIBBPF_SUPPORT static ssize_t bpf_read(struct dso *dso, u64 offset, char *data) { struct bpf_prog_info_node *node; @@ -765,6 +768,7 @@ static int bpf_size(struct dso *dso) dso->data.file_size = node->info_linear->info.jited_prog_len; return 0; } +#endif // HAVE_LIBBPF_SUPPORT static void dso_cache__free(struct dso *dso) @@ -894,10 +898,12 @@ static struct dso_cache *dso_cache__populate(struct dso *dso, *ret = -ENOMEM; return NULL; } - +#ifdef HAVE_LIBBPF_SUPPORT if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) *ret = bpf_read(dso, cache_offset, cache->data); - else if (dso->binary_type == DSO_BINARY_TYPE__OOL) + else +#endif + if (dso->binary_type == DSO_BINARY_TYPE__OOL) *ret = DSO__DATA_CACHE_SIZE; else *ret = file_read(dso, machine, cache_offset, cache->data); @@ -1018,10 +1024,10 @@ int dso__data_file_size(struct dso *dso, struct machine *machine) if (dso->data.status == DSO_DATA_STATUS_ERROR) return -1; - +#ifdef HAVE_LIBBPF_SUPPORT if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) return bpf_size(dso); - +#endif return file_size(dso, machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index fadc59708ece..9130f6fad8d5 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -5,16 +5,18 @@ #include "util/header.h" #include #include -#include "bpf-event.h" #include "cgroup.h" #include #include -#include #include #include struct perf_env perf_env; +#ifdef HAVE_LIBBPF_SUPPORT +#include "bpf-event.h" +#include + void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { @@ -163,6 +165,11 @@ static void perf_env__purge_bpf(struct perf_env *env) up_write(&env->bpf_progs.lock); } +#else // HAVE_LIBBPF_SUPPORT +static void perf_env__purge_bpf(struct perf_env *env __maybe_unused) +{ +} +#endif // HAVE_LIBBPF_SUPPORT void perf_env__exit(struct perf_env *env) { @@ -197,11 +204,13 @@ void perf_env__exit(struct perf_env *env) zfree(&env->memory_nodes); } -void perf_env__init(struct perf_env *env) +void perf_env__init(struct perf_env *env __maybe_unused) { +#ifdef HAVE_LIBBPF_SUPPORT env->bpf_progs.infos = RB_ROOT; env->bpf_progs.btfs = RB_ROOT; init_rwsem(&env->bpf_progs.lock); +#endif } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index a12972652006..ca249bf5e984 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -77,7 +77,7 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; - +#ifdef HAVE_LIBBPF_SUPPORT /* * bpf_info_lock protects bpf rbtrees. This is needed because the * trees are accessed by different threads in perf-top @@ -89,7 +89,7 @@ struct perf_env { struct rb_root btfs; u32 btfs_cnt; } bpf_progs; - +#endif // HAVE_LIBBPF_SUPPORT /* same reason as above (for perf-top) */ struct { struct rw_semaphore lock; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index be850e9f8852..598285a21dad 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -19,7 +19,9 @@ #include #include #include +#ifdef HAVE_LIBBPF_SUPPORT #include +#endif #include #include "dso.h" @@ -987,13 +989,6 @@ out: up_read(&env->bpf_progs.lock); return ret; } -#else // HAVE_LIBBPF_SUPPORT -static int write_bpf_prog_info(struct feat_fd *ff __maybe_unused, - struct evlist *evlist __maybe_unused) -{ - return 0; -} -#endif // HAVE_LIBBPF_SUPPORT static int write_bpf_btf(struct feat_fd *ff, struct evlist *evlist __maybe_unused) @@ -1027,6 +1022,7 @@ out: up_read(&env->bpf_progs.lock); return ret; } +#endif // HAVE_LIBBPF_SUPPORT static int cpu_cache_level__sort(const void *a, const void *b) { @@ -1638,6 +1634,7 @@ static void print_dir_format(struct feat_fd *ff, FILE *fp) fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version); } +#ifdef HAVE_LIBBPF_SUPPORT static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) { struct perf_env *env = &ff->ph->env; @@ -1683,6 +1680,7 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp) up_read(&env->bpf_progs.lock); } +#endif // HAVE_LIBBPF_SUPPORT static void free_event_desc(struct evsel *events) { @@ -2938,12 +2936,6 @@ out: up_write(&env->bpf_progs.lock); return err; } -#else // HAVE_LIBBPF_SUPPORT -static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data __maybe_unused) -{ - return 0; -} -#endif // HAVE_LIBBPF_SUPPORT static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) { @@ -2990,6 +2982,7 @@ out: free(node); return err; } +#endif // HAVE_LIBBPF_SUPPORT static int process_compressed(struct feat_fd *ff, void *data __maybe_unused) @@ -3120,8 +3113,10 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), FEAT_OPR(CLOCKID, clockid, false), FEAT_OPN(DIR_FORMAT, dir_format, false), +#ifdef HAVE_LIBBPF_SUPPORT FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), +#endif FEAT_OPR(COMPRESSED, compressed, false), FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), FEAT_OPR(CLOCK_DATA, clock_data, false), -- cgit v1.3.1 From 1218838d68f5e9cc195685f17375be96a54832c7 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Oct 2020 15:24:21 +0900 Subject: perf kvm: Add kvm-stat for arm64 Add support for 'perf kvm stat' on arm64 platform. Example: # perf kvm stat report Analyze events for all VMs, all VCPUs: VM-EXIT Samples Samples% Time% Min Time Max Time Avg time DABT_LOW 661867 98.91% 40.45% 2.19us 3364.65us 6.24us ( +- 0.34% ) IRQ 4598 0.69% 57.44% 2.89us 3397.59us 1276.27us ( +- 1.61% ) WFx 1475 0.22% 1.71% 2.22us 3388.63us 118.31us ( +- 8.69% ) IABT_LOW 1018 0.15% 0.38% 2.22us 2742.07us 38.29us ( +- 12.55% ) SYS64 180 0.03% 0.01% 2.07us 112.91us 6.57us ( +- 14.95% ) HVC64 17 0.00% 0.01% 2.19us 322.35us 42.95us ( +- 58.98% ) Total Samples:669155, Total events handled time:10216387.86us. Signed-off-by: Sergey Senozhatsky Reviewed-by: Leo Yan Tested-by: Leo Yan Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: Suleiman Souhlal Link: http://lore.kernel.org/lkml/20201027062421.463355-1-sergey.senozhatsky@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/Makefile | 1 + tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/arm64_exception_types.h | 92 ++++++++++++++++++++++ tools/perf/arch/arm64/util/kvm-stat.c | 85 ++++++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 tools/perf/arch/arm64/util/arm64_exception_types.h create mode 100644 tools/perf/arch/arm64/util/kvm-stat.c (limited to 'tools') diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index dbef716a1913..fab3095fb5d0 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_JITDUMP := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +HAVE_KVM_STAT_SUPPORT := 1 # # Syscall table generation for perf diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index b53294d74b01..8d2b9bcfffca 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -2,6 +2,7 @@ perf-y += header.o perf-y += machine.o perf-y += perf_regs.o perf-y += tsc.o +perf-y += kvm-stat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h new file mode 100644 index 000000000000..27c981ebe401 --- /dev/null +++ b/tools/perf/arch/arm64/util/arm64_exception_types.h @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ARCH_PERF_ARM64_EXCEPTION_TYPES_H +#define ARCH_PERF_ARM64_EXCEPTION_TYPES_H + +/* Per asm/virt.h */ +#define HVC_STUB_ERR 0xbadca11 + +/* Per asm/kvm_asm.h */ +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_EL1_SERROR 1 +#define ARM_EXCEPTION_TRAP 2 +#define ARM_EXCEPTION_IL 3 +/* The hyp-stub will return this for any kvm_call_hyp() call */ +#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR + +#define kvm_arm_exception_type \ + {ARM_EXCEPTION_IRQ, "IRQ" }, \ + {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \ + {ARM_EXCEPTION_TRAP, "TRAP" }, \ + {ARM_EXCEPTION_IL, "ILLEGAL" }, \ + {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" } + +/* Per asm/esr.h */ +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 (0x18) +#define ESR_ELx_EC_SVE (0x19) +#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +/* Unallocated EC: 0x1b - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ +/* Unallocated EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ECN(x) { ESR_ELx_EC_##x, #x } + +#define kvm_arm_exception_class \ + ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \ + ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \ + ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \ + ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ + ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ + ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + +#endif /* ARCH_PERF_ARM64_EXCEPTION_TYPES_H */ diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c new file mode 100644 index 000000000000..50376b9062c1 --- /dev/null +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "../../util/evsel.h" +#include "../../util/kvm-stat.h" +#include "arm64_exception_types.h" +#include "debug.h" + +define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type); +define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class); + +const char *kvm_trap_exit_reason = "esr_ec"; +const char *vcpu_id_str = "id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "ret"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + +const char *kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + NULL, +}; + +static void event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); + key->exit_reasons = arm64_exit_reasons; + + /* + * TRAP exceptions carry exception class info in esr_ec field + * and, hence, we need to use a different exit_reasons table to + * properly decode event's est_ec. + */ + if (key->key == ARM_EXCEPTION_TRAP) { + key->key = evsel__intval(evsel, sample, kvm_trap_exit_reason); + key->exit_reasons = arm64_trap_exit_reasons; + } +} + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return !strcmp(evsel->name, kvm_entry_trace); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (!strcmp(evsel->name, kvm_exit_trace)) { + event_get_key(evsel, sample, key); + return true; + } + return false; +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { + .name = "vmexit", + .ops = &exit_events, + }, + { NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "arm64"; + return 0; +} -- cgit v1.3.1 From 9b0a7836359443227c9af101f7aea8412e739458 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 27 Oct 2020 16:28:54 +0900 Subject: perf test: Use generic event for expand_libpfm_events() I found that the UNHALTED_CORE_CYCLES event is only available in the Intel machines and it makes other vendors/archs fail on the test. As libpfm4 can parse the generic events like cycles, let's use them. Fixes: 40b74c30ffb9 ("perf test: Add expand cgroup event test") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201027072855.655449-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expand-cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index d5771e4d094f..4c59f3ae438f 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -145,7 +145,7 @@ static int expand_libpfm_events(void) int ret; struct evlist *evlist; struct rblist metric_events; - const char event_str[] = "UNHALTED_CORE_CYCLES"; + const char event_str[] = "CYCLES"; struct option opt = { .value = &evlist, }; -- cgit v1.3.1 From bb1c15b60b981d1065d7766ccf9de6c32beedfa3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 27 Oct 2020 16:28:55 +0900 Subject: perf stat: Support regex pattern in --for-each-cgroup To make the command line even more compact with cgroups, support regex pattern matching in cgroup names. $ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1 3,000.73 msec cpu-clock foo # 2.998 CPUs utilized 12,530,992,699 cycles foo # 7.517 GHz (100.00%) 1,000.61 msec cpu-clock foo/bar # 1.000 CPUs utilized 4,178,529,579 cycles foo/bar # 2.506 GHz (100.00%) 1,000.03 msec cpu-clock foo/baz # 0.999 CPUs utilized 4,176,104,315 cycles foo/baz # 2.505 GHz (100.00%) 1.000892614 seconds time elapsed Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201027072855.655449-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 5 +- tools/perf/builtin-stat.c | 5 +- tools/perf/util/cgroup.c | 198 +++++++++++++++++++++++++++++---- 3 files changed, 182 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 9f9f29025e49..2b44c08b3b23 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -168,8 +168,9 @@ command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'. --for-each-cgroup name:: Expand event list for each cgroup in "name" (allow multiple cgroups separated -by comma). This has same effect that repeating -e option and -G option for -each event x name. This option cannot be used with -G/--cgroup option. +by comma). It also support regex patterns to match multiple groups. This has same +effect that repeating -e option and -G option for each event x name. This option +cannot be used with -G/--cgroup option. -o file:: --output file:: diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b01af171d94f..6709578128c9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2235,8 +2235,11 @@ int cmd_stat(int argc, const char **argv) } if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, - &stat_config.metric_events, true) < 0) + &stat_config.metric_events, true) < 0) { + parse_options_usage(stat_usage, stat_options, + "for-each-cgroup", 0); goto out; + } } target__validate(&target); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index b81324a13a2b..704333748549 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -13,9 +13,19 @@ #include #include #include +#include +#include int nr_cgroups; +/* used to match cgroup name with patterns */ +struct cgroup_name { + struct list_head list; + bool used; + char name[]; +}; +static LIST_HEAD(cgroup_list); + static int open_cgroup(const char *name) { char path[PATH_MAX + 1]; @@ -149,6 +159,137 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup) evsel__set_default_cgroup(evsel, cgroup); } +/* helper function for ftw() in match_cgroups and list_cgroups */ +static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused, + int typeflag) +{ + struct cgroup_name *cn; + + if (typeflag != FTW_D) + return 0; + + cn = malloc(sizeof(*cn) + strlen(fpath) + 1); + if (cn == NULL) + return -1; + + cn->used = false; + strcpy(cn->name, fpath); + + list_add_tail(&cn->list, &cgroup_list); + return 0; +} + +static void release_cgroup_list(void) +{ + struct cgroup_name *cn; + + while (!list_empty(&cgroup_list)) { + cn = list_first_entry(&cgroup_list, struct cgroup_name, list); + list_del(&cn->list); + free(cn); + } +} + +/* collect given cgroups only */ +static int list_cgroups(const char *str) +{ + const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; + char *s; + + /* use given name as is - for testing purpose */ + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + if (e - str) { + int ret; + + s = strndup(str, e - str); + if (!s) + return -1; + /* pretend if it's added by ftw() */ + ret = add_cgroup_name(s, NULL, FTW_D); + free(s); + if (ret) + return -1; + } else { + if (add_cgroup_name("", NULL, FTW_D) < 0) + return -1; + } + + if (!p) + break; + str = p+1; + } + + /* these groups will be used */ + list_for_each_entry(cn, &cgroup_list, list) + cn->used = true; + + return 0; +} + +/* collect all cgroups first and then match with the pattern */ +static int match_cgroups(const char *str) +{ + char mnt[PATH_MAX]; + const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; + regex_t reg; + int prefix_len; + char *s; + + if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event")) + return -1; + + /* cgroup_name will have a full path, skip the root directory */ + prefix_len = strlen(mnt); + + /* collect all cgroups in the cgroup_list */ + if (ftw(mnt, add_cgroup_name, 20) < 0) + return -1; + + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + /* allow empty cgroups, i.e., skip */ + if (e - str) { + /* termination added */ + s = strndup(str, e - str); + if (!s) + return -1; + if (regcomp(®, s, REG_NOSUB)) { + free(s); + return -1; + } + + /* check cgroup name with the pattern */ + list_for_each_entry(cn, &cgroup_list, list) { + char *name = cn->name + prefix_len; + + if (name[0] == '/' && name[1]) + name++; + if (!regexec(®, name, 0, NULL, 0)) + cn->used = true; + } + regfree(®); + free(s); + } else { + /* first entry to root cgroup */ + cn = list_first_entry(&cgroup_list, struct cgroup_name, + list); + cn->used = true; + } + + if (!p) + break; + str = p+1; + } + return prefix_len; +} + int parse_cgroups(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -201,6 +342,11 @@ int parse_cgroups(const struct option *opt, const char *str, return 0; } +static bool has_pattern_string(const char *str) +{ + return !!strpbrk(str, "{}[]()|*+?^$"); +} + int evlist__expand_cgroup(struct evlist *evlist, const char *str, struct rblist *metric_events, bool open_cgroup) { @@ -208,8 +354,9 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, struct evsel *pos, *evsel, *leader; struct rblist orig_metric_events; struct cgroup *cgrp = NULL; - const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; int ret = -1; + int prefix_len; if (evlist->core.nr_entries == 0) { fprintf(stderr, "must define events before cgroups\n"); @@ -234,24 +381,27 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, rblist__init(&orig_metric_events); } - for (;;) { - p = strchr(str, ','); - e = p ? p : eos; + if (has_pattern_string(str)) + prefix_len = match_cgroups(str); + else + prefix_len = list_cgroups(str); - /* allow empty cgroups, i.e., skip */ - if (e - str) { - /* termination added */ - char *name = strndup(str, e - str); - if (!name) - goto out_err; + if (prefix_len < 0) + goto out_err; - cgrp = cgroup__new(name, open_cgroup); - free(name); - if (cgrp == NULL) - goto out_err; - } else { - cgrp = NULL; - } + list_for_each_entry(cn, &cgroup_list, list) { + char *name; + + if (!cn->used) + continue; + + /* cgroup_name might have a full path, skip the prefix */ + name = cn->name + prefix_len; + if (name[0] == '/' && name[1]) + name++; + cgrp = cgroup__new(name, open_cgroup); + if (cgrp == NULL) + goto out_err; leader = NULL; evlist__for_each_entry(orig_list, pos) { @@ -277,23 +427,25 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, if (metricgroup__copy_metric_events(tmp_list, cgrp, metric_events, &orig_metric_events) < 0) - break; + goto out_err; } perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; + } - if (!p) { - ret = 0; - break; - } - str = p+1; + if (list_empty(&evlist->core.entries)) { + fprintf(stderr, "no cgroup matched: %s\n", str); + goto out_err; } + ret = 0; + out_err: evlist__delete(orig_list); evlist__delete(tmp_list); rblist__exit(&orig_metric_events); + release_cgroup_list(); return ret; } -- cgit v1.3.1 From 55a4de94c64bacffbcd802c954764e0de2ab217f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Oct 2020 17:27:36 -0700 Subject: perf stat: Add --quiet option Add a new --quiet option to 'perf stat'. This is useful with 'perf stat record' to write the data only to the perf.data file, which can lower measurement overhead because the data doesn't need to be formatted. On my 4C desktop: % time ./perf stat record -e $(python -c 'print ",".join(["cycles"]*1000)') -a -I 1000 sleep 5 ... real 0m5.377s user 0m0.238s sys 0m0.452s % time ./perf stat record --quiet -e $(python -c 'print ",".join(["cycles"]*1000)') -a -I 1000 sleep 5 real 0m5.452s user 0m0.183s sys 0m0.423s In this example it cuts the user time by 20%. On systems with more cores the savings are higher. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Alexey Budankov Link: http://lore.kernel.org/lkml/20201027002737.30942-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 ++++ tools/perf/builtin-stat.c | 6 +++++- tools/perf/util/stat.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 2b44c08b3b23..5d4a673d7621 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -317,6 +317,10 @@ small group that need not have multiplexing is lowered. This option forbids the event merging logic from sharing events between groups and may be used to increase accuracy in this case. +--quiet:: +Don't print output. This is useful with perf stat record below to only +write data to the perf.data file. + STAT RECORD ----------- Stores stat data into perf data file. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6709578128c9..f15b2f8aa14d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -972,6 +972,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) /* Do not print anything if we record to the pipe. */ if (STAT_RECORD && perf_stat.data.is_pipe) return; + if (stat_config.quiet) + return; perf_evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); @@ -1171,6 +1173,8 @@ static struct option stat_options[] = { "threads of same physical core"), OPT_BOOLEAN(0, "summary", &stat_config.summary, "print summary for interval mode"), + OPT_BOOLEAN(0, "quiet", &stat_config.quiet, + "don't print output (useful with record)"), #ifdef HAVE_LIBPFM OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", "libpfm4 event selector. use 'perf list' to list available events", @@ -2132,7 +2136,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (!output) { + if (!output && !stat_config.quiet) { struct timespec tm; mode = append_file ? "a" : "w"; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 487010c624be..05adf8165025 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -122,6 +122,7 @@ struct perf_stat_config { bool metric_no_group; bool metric_no_merge; bool stop_read_counter; + bool quiet; FILE *output; unsigned int interval; unsigned int timeout; -- cgit v1.3.1 From c5e6bc23355a3b33ffc170f92e315102f1e6a59c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 23 Oct 2020 11:06:28 +0900 Subject: perf trace beauty: Allow header files in a different path Current script to generate mmap flags and prot checks headers from the uapi/asm-generic directory but it might come from a different directory in some environment. So change the pattern to accept it. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201023020628.346257-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap_flags.sh | 4 ++-- tools/perf/trace/beauty/mmap_prot.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh index 39eb2595983b..76825710c725 100755 --- a/tools/perf/trace/beauty/mmap_flags.sh +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -28,12 +28,12 @@ egrep -q $regex ${linux_mman} && \ egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") -([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*' ${arch_mman}) && +([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.h>.*' ${arch_mman}) && (egrep $regex ${header_dir}/mman.h | \ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") diff --git a/tools/perf/trace/beauty/mmap_prot.sh b/tools/perf/trace/beauty/mmap_prot.sh index 28f638f8d216..664d8d534a50 100755 --- a/tools/perf/trace/beauty/mmap_prot.sh +++ b/tools/perf/trace/beauty/mmap_prot.sh @@ -17,7 +17,7 @@ prefix="PROT" printf "static const char *mmap_prot[] = {\n" regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}` -([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+ Date: Thu, 22 Oct 2020 19:02:26 +0800 Subject: perf jevents: Tidy error handling There is much duplication in the error handling for directory transvering for prcessing JSONs. Factor out the common code to tidy a bit. Signed-off-by: John Garry Reviewed-By: Kajol Jain Link: https://lore.kernel.org/r/1603364547-197086-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 83 +++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 48 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index e47644cab3fa..7326c14c4623 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1100,12 +1100,13 @@ static int process_one_file(const char *fpath, const struct stat *sb, */ int main(int argc, char *argv[]) { - int rc, ret = 0; + int rc, ret = 0, empty_map = 0; int maxfds; char ldirname[PATH_MAX]; const char *arch; const char *output_file; const char *start_dirname; + char *err_string_ext = ""; struct stat stbuf; prog = basename(argv[0]); @@ -1133,7 +1134,8 @@ int main(int argc, char *argv[]) /* If architecture does not have any event lists, bail out */ if (stat(ldirname, &stbuf) < 0) { pr_info("%s: Arch %s has no PMU event lists\n", prog, arch); - goto empty_map; + empty_map = 1; + goto err_close_eventsfp; } /* Include pmu-events.h first */ @@ -1150,75 +1152,60 @@ int main(int argc, char *argv[]) */ maxfds = get_maxfds(); - mapfile = NULL; rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error preprocessing arch standard files %s\n", - prog, ldirname); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); - return 1; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_std_arch_event_dir; rc = nftw(ldirname, process_one_file, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error walking file tree %s\n", prog, ldirname); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); - ret = 1; - goto out_free_mapfile; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_dir; sprintf(ldirname, "%s/test", start_dirname); rc = nftw(ldirname, process_one_file, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error walking file tree %s rc=%d for test\n", - prog, ldirname, rc); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - free_arch_std_events(); - ret = 1; - goto out_free_mapfile; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_dir; if (close_table) print_events_table_suffix(eventsfp); if (!mapfile) { pr_info("%s: No CPU->JSON mapping?\n", prog); - goto empty_map; + empty_map = 1; + goto err_close_eventsfp; } - if (process_mapfile(eventsfp, mapfile)) { + rc = process_mapfile(eventsfp, mapfile); + fclose(eventsfp); + if (rc) { pr_info("%s: Error processing mapfile %s\n", prog, mapfile); /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); ret = 1; + goto err_out; } + free_arch_std_events(); + free(mapfile); + return 0; - goto out_free_mapfile; - -empty_map: +err_processing_std_arch_event_dir: + err_string_ext = " for std arch event"; +err_processing_dir: + if (verbose) { + pr_info("%s: Error walking file tree %s%s\n", prog, ldirname, + err_string_ext); + empty_map = 1; + } else if (rc < 0) { + ret = 1; + } else { + empty_map = 1; + } +err_close_eventsfp: fclose(eventsfp); - create_empty_mapping(output_file); + if (empty_map) + create_empty_mapping(output_file); +err_out: free_arch_std_events(); -out_free_mapfile: free(mapfile); return ret; } -- cgit v1.3.1 From 644bf4b0f7acde641d3db200b4db66977e96c3bd Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 22 Oct 2020 19:02:27 +0800 Subject: perf jevents: Add test for arch std events Recently there was an undetected breakage for std arch event support. Add support in "PMU events" testcase to detect such breakages. For this, the "test" arch needs has support added to process std arch events. And a test event is added for the test, ifself. Also add a few code comments to help understand the code a bit better. Committer testing: Before: # perf test -vv pmu |& grep l3_cache_rd # After: # perf test -vv pmu |& grep l3_cache_rd testing event table l3_cache_rd: pass testing aliases PMU cpu: matched event l3_cache_rd # Signed-off-by: John Garry Reviewed-By: Kajol Jain Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/1603364547-197086-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/test/arch-std-events.json | 8 ++++++++ tools/perf/pmu-events/arch/test/test_cpu/cache.json | 5 +++++ tools/perf/pmu-events/jevents.c | 4 ++++ tools/perf/tests/pmu-events.c | 14 ++++++++++++++ 4 files changed, 31 insertions(+) create mode 100644 tools/perf/pmu-events/arch/test/arch-std-events.json create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/cache.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/test/arch-std-events.json b/tools/perf/pmu-events/arch/test/arch-std-events.json new file mode 100644 index 000000000000..43f6f729d6ae --- /dev/null +++ b/tools/perf/pmu-events/arch/test/arch-std-events.json @@ -0,0 +1,8 @@ +[ + { + "PublicDescription": "Attributable Level 3 cache access, read", + "EventCode": "0x40", + "EventName": "L3_CACHE_RD", + "BriefDescription": "L3 cache access, read" + } +] diff --git a/tools/perf/pmu-events/arch/test/test_cpu/cache.json b/tools/perf/pmu-events/arch/test/test_cpu/cache.json new file mode 100644 index 000000000000..036d0efdb2bb --- /dev/null +++ b/tools/perf/pmu-events/arch/test/test_cpu/cache.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "L3_CACHE_RD" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 7326c14c4623..72cfa3b5046d 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1162,6 +1162,10 @@ int main(int argc, char *argv[]) sprintf(ldirname, "%s/test", start_dirname); + rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); + if (rc) + goto err_processing_std_arch_event_dir; + rc = nftw(ldirname, process_one_file, maxfds, 0); if (rc) goto err_processing_dir; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index d3517a74d95e..ad2b21591275 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -14,8 +14,10 @@ #include "util/parse-events.h" struct perf_pmu_test_event { + /* used for matching against events from generated pmu-events.c */ struct pmu_event event; + /* used for matching against event aliases */ /* extra events for aliases */ const char *alias_str; @@ -78,6 +80,17 @@ static struct perf_pmu_test_event test_cpu_events[] = { .alias_str = "umask=0,(null)=0x30d40,event=0x3a", .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", }, + { + .event = { + .name = "l3_cache_rd", + .event = "event=0x40", + .desc = "L3 cache access, read", + .long_desc = "Attributable Level 3 cache access, read", + .topic = "cache", + }, + .alias_str = "event=0x40", + .alias_long_desc = "Attributable Level 3 cache access, read", + }, { /* sentinel */ .event = { .name = NULL, @@ -357,6 +370,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count) } +/* Test that aliases generated are as expected */ static int test_aliases(void) { struct perf_pmu *pmu = NULL; -- cgit v1.3.1 From f78331f74cacb33d87cd60376dacc5bd397959e2 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 3 Nov 2020 10:41:29 +0100 Subject: libbpf: Fix null dereference in xsk_socket__delete Fix a possible null pointer dereference in xsk_socket__delete that will occur if a null pointer is fed into the function. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Reported-by: Andrii Nakryiko Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1604396490-12129-2-git-send-email-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e3c98c007825..504b7a85d445 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -891,13 +891,14 @@ int xsk_umem__delete(struct xsk_umem *umem) void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); - struct xsk_ctx *ctx = xsk->ctx; struct xdp_mmap_offsets off; + struct xsk_ctx *ctx; int err; if (!xsk) return; + ctx = xsk->ctx; if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); -- cgit v1.3.1 From 25cf73b9ff88fd4608699a0313f820758b4c252d Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 3 Nov 2020 10:41:30 +0100 Subject: libbpf: Fix possible use after free in xsk_socket__delete Fix a possible use after free in xsk_socket__delete that will happen if xsk_put_ctx() frees the ctx. To fix, save the umem reference taken from the context and just use that instead. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1604396490-12129-3-git-send-email-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 504b7a85d445..9bc537d0b92d 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -892,6 +892,7 @@ void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); struct xdp_mmap_offsets off; + struct xsk_umem *umem; struct xsk_ctx *ctx; int err; @@ -899,6 +900,7 @@ void xsk_socket__delete(struct xsk_socket *xsk) return; ctx = xsk->ctx; + umem = ctx->umem; if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); @@ -918,11 +920,11 @@ void xsk_socket__delete(struct xsk_socket *xsk) xsk_put_ctx(ctx); - ctx->umem->refcount--; + umem->refcount--; /* Do not close an fd that also has an associated umem connected * to it. */ - if (xsk->fd != ctx->umem->fd) + if (xsk->fd != umem->fd) close(xsk->fd); free(xsk); } -- cgit v1.3.1 From 537e48259eacbd92f3463900c20cc3acd9dd2072 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:23:57 +0200 Subject: selftests: net: bridge: factor out mcast_packet_test Factor out mcast_packet_test into lib.sh so it can be later extended and reused by MLDv2 tests. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 32 ---------------------- tools/testing/selftests/net/forwarding/lib.sh | 32 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 0e71abdd7a03..50a48ce16ba1 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -105,38 +105,6 @@ cleanup() vrf_cleanup } -# return 0 if the packet wasn't seen on host2_if or 1 if it was -mcast_packet_test() -{ - local mac=$1 - local src_ip=$2 - local ip=$3 - local host1_if=$4 - local host2_if=$5 - local seen=0 - - # Add an ACL on `host2_if` which will tell us whether the packet - # was received by it or not. - tc qdisc add dev $host2_if ingress - tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ - flower ip_proto udp dst_mac $mac action drop - - $MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q - sleep 1 - - tc -j -s filter show dev $host2_if ingress \ - | jq -e ".[] | select(.options.handle == 101) \ - | select(.options.actions[0].stats.packets == 1)" &> /dev/null - if [[ $? -eq 0 ]]; then - seen=1 - fi - - tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower - tc qdisc del dev $host2_if ingress - - return $seen -} - v2reportleave_test() { RET=0 diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 927f9ba49e08..bb3ccc6d2165 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1270,3 +1270,35 @@ tcpdump_show() { tcpdump -e -n -r $capfile 2>&1 } + +# return 0 if the packet wasn't seen on host2_if or 1 if it was +mcast_packet_test() +{ + local mac=$1 + local src_ip=$2 + local ip=$3 + local host1_if=$4 + local host2_if=$5 + local seen=0 + + # Add an ACL on `host2_if` which will tell us whether the packet + # was received by it or not. + tc qdisc add dev $host2_if ingress + tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ + flower ip_proto udp dst_mac $mac action drop + + $MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q + sleep 1 + + tc -j -s filter show dev $host2_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + if [[ $? -eq 0 ]]; then + seen=1 + fi + + tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower + tc qdisc del dev $host2_if ingress + + return $seen +} -- cgit v1.3.1 From 450b0b84c6609e7ec1fb0276c8a7e4efa9e78a4c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:23:58 +0200 Subject: selftests: net: lib: add support for IPv6 mcast packet test In order to test an IPv6 multicast packet we need to pass different tc and mausezahn protocols only, so add a simple check for the destination address which decides if we should generate an IPv4 or IPv6 mcast packet. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index bb3ccc6d2165..0a427b8a039d 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1280,14 +1280,22 @@ mcast_packet_test() local host1_if=$4 local host2_if=$5 local seen=0 + local tc_proto="ip" + local mz_v6arg="" + + # basic check to see if we were passed an IPv4 address, if not assume IPv6 + if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then + tc_proto="ipv6" + mz_v6arg="-6" + fi # Add an ACL on `host2_if` which will tell us whether the packet # was received by it or not. tc qdisc add dev $host2_if ingress - tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ + tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \ flower ip_proto udp dst_mac $mac action drop - $MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q + $MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q sleep 1 tc -j -s filter show dev $host2_if ingress \ @@ -1297,7 +1305,7 @@ mcast_packet_test() seen=1 fi - tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower + tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower tc qdisc del dev $host2_if ingress return $seen -- cgit v1.3.1 From 95e6f430ebfee51ac174e234388e7c6e8216ff2c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:23:59 +0200 Subject: selftests: net: bridge: factor out and rename sg state functions Factor out S,G entry state checking functions for existence, forwarding, blocking and timer to lib.sh so they can be later used by MLDv2 tests. Add brmcast_ suffix to their name to make the relation to the bridge explicit. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_igmp.sh | 179 +++++++-------------- tools/testing/selftests/net/forwarding/lib.sh | 67 ++++++++ 2 files changed, 123 insertions(+), 123 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 50a48ce16ba1..675eff45b037 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -137,73 +137,6 @@ v2reportleave_test() log_test "IGMPv2 leave $TEST_GROUP" } -check_sg_entries() -{ - local report=$1; shift - local slist=("$@") - local sarg="" - - for src in "${slist[@]}"; do - sarg="${sarg} and .source_list[].address == \"$src\"" - done - bridge -j -d -s mdb show dev br0 \ - | jq -e ".[].mdb[] | \ - select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null - check_err $? "Wrong *,G entry source list after $report report" - - for sgent in "${slist[@]}"; do - bridge -j -d -s mdb show dev br0 \ - | jq -e ".[].mdb[] | \ - select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null - check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)" - done -} - -check_sg_fwding() -{ - local should_fwd=$1; shift - local sources=("$@") - - for src in "${sources[@]}"; do - local retval=0 - - mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1 - retval=$? - if [ $should_fwd -eq 1 ]; then - check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)" - else - check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)" - fi - done -} - -check_sg_state() -{ - local is_blocked=$1; shift - local sources=("$@") - local should_fail=1 - - if [ $is_blocked -eq 1 ]; then - should_fail=0 - fi - - for src in "${sources[@]}"; do - bridge -j -d -s mdb show dev br0 \ - | jq -e ".[].mdb[] | \ - select(.grp == \"$TEST_GROUP\" and .source_list != null) | - .source_list[] | - select(.address == \"$src\") | - select(.timer == \"0.00\")" &>/dev/null - check_err_fail $should_fail $? "Entry $src has zero timer" - - bridge -j -d -s mdb show dev br0 \ - | jq -e ".[].mdb[] | \ - select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \ - .flags[] == \"blocked\")" &>/dev/null - check_err_fail $should_fail $? "Entry $src has blocked flag" - done -} - v3include_prepare() { local host1_if=$1 @@ -225,7 +158,7 @@ v3include_prepare() select(.grp == \"$TEST_GROUP\" and \ .source_list != null and .filter_mode == \"include\")" &>/dev/null check_err $? "Wrong *,G entry filter mode" - check_sg_entries "is_include" "${X[@]}" + brmcast_check_sg_entries "is_include" "${X[@]}" } v3exclude_prepare() @@ -247,10 +180,10 @@ v3exclude_prepare() .source_list != null and .filter_mode == \"exclude\")" &>/dev/null check_err $? "Wrong *,G entry filter mode" - check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" - check_sg_state 0 "${X[@]}" - check_sg_state 1 "${Y[@]}" + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" bridge -j -d -s mdb show dev br0 \ | jq -e ".[].mdb[] | \ @@ -276,10 +209,10 @@ v3include_test() v3include_prepare $h1 $ALL_MAC $ALL_GROUP - check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 0 "${X[@]}" - check_sg_fwding 1 "${X[@]}" - check_sg_fwding 0 "192.0.2.100" + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" log_test "IGMPv3 report $TEST_GROUP is_include" @@ -295,12 +228,12 @@ v3inc_allow_test() $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q sleep 1 - check_sg_entries "allow" "${X[@]}" + brmcast_check_sg_entries "allow" "${X[@]}" - check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 0 "${X[@]}" - check_sg_fwding 1 "${X[@]}" - check_sg_fwding 0 "192.0.2.100" + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" log_test "IGMPv3 report $TEST_GROUP include -> allow" @@ -316,12 +249,12 @@ v3inc_is_include_test() $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q sleep 1 - check_sg_entries "is_include" "${X[@]}" + brmcast_check_sg_entries "is_include" "${X[@]}" - check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 0 "${X[@]}" - check_sg_fwding 1 "${X[@]}" - check_sg_fwding 0 "192.0.2.100" + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" log_test "IGMPv3 report $TEST_GROUP include -> is_include" @@ -334,8 +267,8 @@ v3inc_is_exclude_test() v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP - check_sg_fwding 1 "${X[@]}" 192.0.2.100 - check_sg_fwding 0 "${Y[@]}" + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" log_test "IGMPv3 report $TEST_GROUP include -> is_exclude" @@ -361,10 +294,10 @@ v3inc_to_exclude_test() .source_list != null and .filter_mode == \"exclude\")" &>/dev/null check_err $? "Wrong *,G entry filter mode" - check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" - check_sg_state 0 "${X[@]}" - check_sg_state 1 "${Y[@]}" + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" bridge -j -d -s mdb show dev br0 \ | jq -e ".[].mdb[] | \ @@ -379,8 +312,8 @@ v3inc_to_exclude_test() .source_list[].address == \"192.0.2.21\")" &>/dev/null check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists" - check_sg_fwding 1 "${X[@]}" 192.0.2.100 - check_sg_fwding 0 "${Y[@]}" + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" log_test "IGMPv3 report $TEST_GROUP include -> to_exclude" @@ -399,13 +332,13 @@ v3exc_allow_test() $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q sleep 1 - check_sg_entries "allow" "${X[@]}" "${Y[@]}" + brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}" - check_sg_state 0 "${X[@]}" - check_sg_state 1 "${Y[@]}" + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" - check_sg_fwding 1 "${X[@]}" 192.0.2.100 - check_sg_fwding 0 "${Y[@]}" + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" log_test "IGMPv3 report $TEST_GROUP exclude -> allow" @@ -422,13 +355,13 @@ v3exc_is_include_test() $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q sleep 1 - check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}" - check_sg_state 0 "${X[@]}" - check_sg_state 1 "${Y[@]}" + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" - check_sg_fwding 1 "${X[@]}" 192.0.2.100 - check_sg_fwding 0 "${Y[@]}" + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" log_test "IGMPv3 report $TEST_GROUP exclude -> is_include" @@ -445,13 +378,13 @@ v3exc_is_exclude_test() $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q sleep 1 - check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" - check_sg_state 0 "${X[@]}" - check_sg_state 1 "${Y[@]}" + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" - check_sg_fwding 1 "${X[@]}" 192.0.2.100 - check_sg_fwding 0 "${Y[@]}" + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude" @@ -471,13 +404,13 @@ v3exc_to_exclude_test() $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q sleep 1 - check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" - check_sg_state 0 "${X[@]}" - check_sg_state 1 "${Y[@]}" + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" - check_sg_fwding 1 "${X[@]}" 192.0.2.100 - check_sg_fwding 0 "${Y[@]}" + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude" @@ -496,9 +429,9 @@ v3inc_block_test() $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q # make sure the lowered timers have expired (by default 2 seconds) sleep 3 - check_sg_entries "block" "${X[@]}" + brmcast_check_sg_entries "block" "${X[@]}" - check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 0 "${X[@]}" bridge -j -d -s mdb show dev br0 \ | jq -e ".[].mdb[] | \ @@ -507,8 +440,8 @@ v3inc_block_test() .source_list[].address == \"192.0.2.1\")" &>/dev/null check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" - check_sg_fwding 1 "${X[@]}" - check_sg_fwding 0 "192.0.2.100" + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" log_test "IGMPv3 report $TEST_GROUP include -> block" @@ -528,13 +461,13 @@ v3exc_block_test() $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q sleep 1 - check_sg_entries "block" "${X[@]}" "${Y[@]}" + brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}" - check_sg_state 0 "${X[@]}" - check_sg_state 1 "${Y[@]}" + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" - check_sg_fwding 1 "${X[@]}" 192.0.2.100 - check_sg_fwding 0 "${Y[@]}" + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" log_test "IGMPv3 report $TEST_GROUP exclude -> block" @@ -574,12 +507,12 @@ v3exc_timeout_test() .source_list[].address == \"192.0.2.2\")" &>/dev/null check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" - check_sg_entries "allow" "${X[@]}" + brmcast_check_sg_entries "allow" "${X[@]}" - check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 0 "${X[@]}" - check_sg_fwding 1 "${X[@]}" - check_sg_fwding 0 192.0.2.100 + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 192.0.2.100 log_test "IGMPv3 group $TEST_GROUP exclude timeout" @@ -610,7 +543,7 @@ v3star_ex_auto_add_test() .flags[] == \"added_by_star_ex\")" &>/dev/null check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" - check_sg_fwding 1 192.0.2.3 + brmcast_check_sg_fwding 1 192.0.2.3 log_test "IGMPv3 S,G port entry automatic add to a *,G port" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 0a427b8a039d..98ea37d26c44 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1310,3 +1310,70 @@ mcast_packet_test() return $seen } + +brmcast_check_sg_entries() +{ + local report=$1; shift + local slist=("$@") + local sarg="" + + for src in "${slist[@]}"; do + sarg="${sarg} and .source_list[].address == \"$src\"" + done + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null + check_err $? "Wrong *,G entry source list after $report report" + + for sgent in "${slist[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null + check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)" + done +} + +brmcast_check_sg_fwding() +{ + local should_fwd=$1; shift + local sources=("$@") + + for src in "${sources[@]}"; do + local retval=0 + + mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1 + retval=$? + if [ $should_fwd -eq 1 ]; then + check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)" + else + check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)" + fi + done +} + +brmcast_check_sg_state() +{ + local is_blocked=$1; shift + local sources=("$@") + local should_fail=1 + + if [ $is_blocked -eq 1 ]; then + should_fail=0 + fi + + for src in "${sources[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null) | + .source_list[] | + select(.address == \"$src\") | + select(.timer == \"0.00\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has zero timer" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \ + .flags[] == \"blocked\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has blocked flag" + done +} -- cgit v1.3.1 From 444c897111b02b06599b92e597436e09bd969501 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:00 +0200 Subject: selftests: net: bridge: add initial MLDv2 include test Add the initial setup for MLDv2 tests with the first test of a simple is_include report. For MLDv2 we need to setup the bridge properly and we also send the full precooked packets instead of relying on mausezahn to fill in some parts. For verification we use the generic S,G state checking functions from lib.sh. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 146 +++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/bridge_mld.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh new file mode 100755 index 000000000000..3d0d579e4e03 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="mldv2include_test" +NUM_NETIFS=4 +CHECK_TC="yes" +TEST_GROUP="ff02::cc" +TEST_GROUP_MAC="33:33:00:00:00:cc" + +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3 +MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\ +00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\ +00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\ +00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\ +00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \ + mcast_mld_version 2 mcast_startup_query_interval 300 \ + mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + # make sure a query has been generated + sleep 5 +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +mldv2include_prepare() +{ + local host1_if=$1 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3") + + ip link set dev br0 type bridge mcast_mld_version 2 + check_err $? "Could not change bridge MLD version to 2" + + $MZ $host1_if $MZPKT_IS_INC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null + check_err $? "Missing *,G entry with source list" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + brmcast_check_sg_entries "is_include" "${X[@]}" +} + +mldv2cleanup() +{ + local port=$1 + + bridge mdb del dev br0 port $port grp $TEST_GROUP + ip link set dev br0 type bridge mcast_mld_version 1 +} + +mldv2include_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3") + + mldv2include_prepare $h1 + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP is_include" + + mldv2cleanup $swp1 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.3.1 From 0ef10e60682ec2604e34ff8e6eff8fb39fee176c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:01 +0200 Subject: selftests: net: bridge: add test for mldv2 inc -> allow report The test checks for the following case: Router State Report Received New Router State Actions INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=MALI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 29 +++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index 3d0d579e4e03..accc4ec2dcce 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test" +ALL_TESTS="mldv2include_test mldv2inc_allow_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -13,6 +13,12 @@ MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01: 00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\ 00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\ 00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" +# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 +MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\ +00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\ +02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\ +00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\ +00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" source lib.sh @@ -136,6 +142,27 @@ mldv2include_test() mldv2cleanup $swp1 } +mldv2inc_allow_test() +{ + RET=0 + local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_ALLOW -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP include -> allow" + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From f44de2bc684da9d310c7703a077bd992ebdf71b1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:02 +0200 Subject: selftests: net: bridge: add test for mldv2 inc -> is_include report The test checks for the following case: Router State Report Received New Router State Actions INCLUDE (A) IS_IN (B) INCLUDE (A+B) (B)=MALI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 29 +++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index accc4ec2dcce..a93bf6fa6caa 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test" +ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -13,6 +13,12 @@ MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01: 00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\ 00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\ 00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 +MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\ +00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\ +05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\ +00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\ +00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" # MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\ 00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\ @@ -163,6 +169,27 @@ mldv2inc_allow_test() mldv2cleanup $swp1 } +mldv2inc_is_include_test() +{ + RET=0 + local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_INC2 -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP include -> is_include" + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From f9fcd55328a934a252b89b6cdde6c888a62207a7 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:03 +0200 Subject: selftests: net: bridge: add test for mldv2 inc -> is_exclude report The test checks for the following case: Router State Report Received New Router State Actions INCLUDE (A) IS_EX (B) EXCLUDE (A*B, B-A) (B-A)=0 Delete (A-B) Filter Timer=MALI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 54 +++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index a93bf6fa6caa..ddef8699be7d 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test" +ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -25,6 +25,12 @@ MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:f 02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\ 00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\ 00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21 +MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21" source lib.sh @@ -123,6 +129,38 @@ mldv2include_prepare() brmcast_check_sg_entries "is_include" "${X[@]}" } +mldv2exclude_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local pkt=$4 + local X=("2001:db8:1::1" "2001:db8:1::2") + local Y=("2001:db8:1::20" "2001:db8:1::21") + + mldv2include_prepare $h1 + + $MZ $host1_if -c 1 $MZPKT_IS_EXC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists" +} + mldv2cleanup() { local port=$1 @@ -190,6 +228,20 @@ mldv2inc_is_include_test() mldv2cleanup $swp1 } +mldv2inc_is_exclude_test() +{ + RET=0 + + mldv2exclude_prepare $h1 + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP include -> is_exclude" + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 55852f1d6a337e63c38e9c247fffaeabb5faef16 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:04 +0200 Subject: selftests: net: bridge: add test for mldv2 inc -> to_exclude report The test checks for the following case: Router State Report Received New Router State Actions INCLUDE (A) TO_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 Delete (A-B) Send Q(MA,A*B) Filter Timer=MALI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 56 +++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index ddef8699be7d..571b01ef672c 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test" +ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ + mldv2inc_to_exclude_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -31,6 +32,12 @@ MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01: 00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\ 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21" +# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 +MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" source lib.sh @@ -242,6 +249,53 @@ mldv2inc_is_exclude_test() mldv2cleanup $swp1 } +mldv2inc_to_exclude_test() +{ + RET=0 + local X=("2001:db8:1::1") + local Y=("2001:db8:1::20" "2001:db8:1::30") + + mldv2include_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_TO_EXC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::21\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP include -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 0e77581fdf302547f71749e2e1cd657562189375 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:05 +0200 Subject: selftests: net: bridge: add test for mldv2 exc -> allow report The test checks for the following case: Router State Report Received New Router State Actions EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=MALI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index 571b01ef672c..97882c13f278 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test" + mldv2inc_to_exclude_test mldv2exc_allow_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -26,6 +26,11 @@ MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:f 02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\ 00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\ 00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30 +MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" # MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21 MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\ 00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ @@ -296,6 +301,29 @@ mldv2inc_to_exclude_test() mldv2cleanup $swp1 } +mldv2exc_allow_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30") + local Y=("2001:db8:1::21") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> allow" + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 25ba7c03ef1ab77639b00cd2932e0de3b402bed7 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:06 +0200 Subject: selftests: net: bridge: add test for mldv2 exc -> is_include report The test checks for the following case: Router State Report Received New Router State Actions EXCLUDE (X,Y) IS_IN (A) EXCLUDE (X+A, Y-A) (A)=MALI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index 97882c13f278..bae865b5bc8c 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test mldv2exc_allow_test" + mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -20,6 +20,11 @@ MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01 05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\ 00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\ 00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30 +MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" # MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\ 00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\ @@ -324,6 +329,29 @@ mldv2exc_allow_test() mldv2cleanup $swp1 } +mldv2exc_is_include_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30") + local Y=("2001:db8:1::21") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_INC3 -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> is_include" + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From d0b19dedd6c26a797455acb2f198fe946793f209 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:07 +0200 Subject: selftests: net: bridge: add test for mldv2 exc -> is_exclude report The test checks for the following case: Router State Report Received New Router State Actions EXCLUDE (X,Y) IS_EX (A) EXCLUDE (A-Y, Y*A) (A-X-Y)=MALI Delete (X-A) Delete (Y-A) Filter Timer=MALI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index bae865b5bc8c..0f48c8da041b 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -2,7 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test" + mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ + mldv2exc_is_exclude_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -42,6 +43,11 @@ MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01: 00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\ 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21" +# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30 +MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" # MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\ 00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ @@ -352,6 +358,29 @@ mldv2exc_is_include_test() mldv2cleanup $swp1 } +mldv2exc_is_exclude_test() +{ + RET=0 + local X=("2001:db8:1::30") + local Y=("2001:db8:1::20") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_EXC2 -q + sleep 1 + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude" + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 9eb4394db91c5ef9595872974224a38719781829 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:08 +0200 Subject: selftests: net: bridge: add test for mldv2 exc -> to_exclude report The test checks for the following case: Router State Report Received New Router State Actions EXCLUDE (X,Y) TO_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y) = Filter Timer Delete (X-A) Delete (Y-A) Send Q(MA,A-Y) Filter Timer=MALI Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index 0f48c8da041b..024fa22fa3c2 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -3,7 +3,7 @@ ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test" + mldv2exc_is_exclude_test mldv2exc_to_exclude_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -381,6 +381,34 @@ mldv2exc_is_exclude_test() mldv2cleanup $swp1 } +mldv2exc_to_exclude_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::30") + local Y=("2001:db8:1::20") + + mldv2exclude_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_TO_EXC -q + sleep 1 + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 57386215cc0b4ed483a3ebcb0d2a378ab0db14ba Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:09 +0200 Subject: selftests: net: bridge: add test for mldv2 inc -> block report The test checks for the following case: Router State Report Received New Router State Actions INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(MA,A*B) Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 37 +++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index 024fa22fa3c2..a3c405b2fd6f 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -3,7 +3,7 @@ ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test mldv2exc_to_exclude_test" + mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -54,6 +54,12 @@ MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01: 00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\ 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 +MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\ +00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\ +00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\ +0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\ +0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" source lib.sh @@ -409,6 +415,35 @@ mldv2exc_to_exclude_test() mldv2cleanup $swp1 } +mldv2inc_block_test() +{ + RET=0 + local X=("2001:db8:1::2" "2001:db8:1::3") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_BLOCK -q + # make sure the lowered timers have expired (by default 2 seconds) + sleep 3 + brmcast_check_sg_entries "block" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 2001:db8:1::100 + + log_test "MLDv2 report $TEST_GROUP include -> block" + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From a2d667f0c1fb30e24ae71f6b8e832808bf18f117 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:10 +0200 Subject: selftests: net: bridge: add test for mldv2 exc -> block report The test checks for the following case: Router State Report Received New Router State Actions EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y) = Filter Timer Send Q(MA,A-Y) Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index a3c405b2fd6f..c498e51b8d2b 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -3,7 +3,8 @@ ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test" + mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ + mldv2exc_block_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -444,6 +445,34 @@ mldv2inc_block_test() mldv2cleanup $swp1 } +mldv2exc_block_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30") + local Y=("2001:db8:1::20" "2001:db8:1::21") + + mldv2exclude_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_BLOCK -q + sleep 1 + brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> block" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From d598cc6a2d45321a2a662742f8c38b43021e36e0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:11 +0200 Subject: selftests: net: bridge: add test for mldv2 exclude timeout Test that when a group in exclude mode expires it changes mode to include and the blocked entries are deleted. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 48 +++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index c498e51b8d2b..b34cf4c6ceba 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -4,7 +4,7 @@ ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ - mldv2exc_block_test" + mldv2exc_block_test mldv2exc_timeout_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -473,6 +473,52 @@ mldv2exc_block_test() mldv2cleanup $swp1 } +mldv2exc_timeout_test() +{ + RET=0 + local X=("2001:db8:1::20" "2001:db8:1::30") + + # GMI should be 3 seconds + ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100 + + mldv2exclude_prepare $h1 + ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500 + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q + sleep 3 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists" + + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 2001:db8:1::100 + + log_test "MLDv2 group $TEST_GROUP exclude timeout" + + ip link set dev br0 type bridge mcast_query_interval 12500 \ + mcast_query_response_interval 1000 + + mldv2cleanup $swp1 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 252b353c5bb30ee9cc0c3d5cef128cec372e6a2c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Nov 2020 19:24:12 +0200 Subject: selftests: net: bridge: add test for mldv2 *,g auto-add When we have *,G ports in exclude mode and a new S,G,port is added the kernel has to automatically create an S,G entry for each exclude port to get proper forwarding. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index b34cf4c6ceba..ffdcfa87ca2b 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -4,7 +4,7 @@ ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ - mldv2exc_block_test mldv2exc_timeout_test" + mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -519,6 +519,35 @@ mldv2exc_timeout_test() mldv2cleanup $swp1 } +mldv2star_ex_auto_add_test() +{ + RET=0 + + mldv2exclude_prepare $h1 + + $MZ $h2 -c 1 $MZPKT_IS_INC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \ + .port == \"$swp1\")" &>/dev/null + check_err $? "S,G entry for *,G port doesn't exist" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \ + .port == \"$swp1\" and \ + .flags[] == \"added_by_star_ex\")" &>/dev/null + check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" + + brmcast_check_sg_fwding 1 2001:db8:1::3 + + log_test "MLDv2 S,G port entry automatic add to a *,G port" + + mldv2cleanup $swp1 + mldv2cleanup $swp2 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 8d014eaa9254a9b8e0841df40dd36782b451579a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 3 Nov 2020 11:05:09 -0800 Subject: selftests: mptcp: add ADD_ADDR timeout test case This patch added the test case for retransmitting ADD_ADDR when timeout occurs. It set NS1's add_addr_timeout to 1 second, and drop NS2's ADD_ADDR echo packets. Here we need to slow down the transfer process of all data to let the ADD_ADDR suboptions can be retransmitted three times. So we added a new parameter "speed" for do_transfer, it can be set with fast or slow. We also added three new optional parameters for run_tests, and dropped run_remove_tests function. Since we added the netfilter rules in this test case, we need to update the "config" file. Suggested-by: Matthieu Baerts Suggested-by: Paolo Abeni Acked-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 10 +++ tools/testing/selftests/net/mptcp/mptcp_join.sh | 94 ++++++++++++++++++------- 2 files changed, 80 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 741a1c4f4ae8..0faaccd21447 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -5,3 +5,13 @@ CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NF_TABLES=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 08f53d86dedc..0d93b243695f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -13,6 +13,24 @@ capture=0 TEST_COUNT=0 +# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || +# (ip6 && (ip6[74] & 0xf0) == 0x30)'" +CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, + 48 0 0 0, + 84 0 0 240, + 21 0 3 64, + 48 0 0 54, + 84 0 0 240, + 21 6 7 48, + 48 0 0 0, + 84 0 0 240, + 21 0 4 96, + 48 0 0 74, + 84 0 0 240, + 21 0 1 48, + 6 0 0 65535, + 6 0 0 0" + init() { capout=$(mktemp) @@ -82,6 +100,26 @@ reset_with_cookies() done } +reset_with_add_addr_timeout() +{ + local ip="${1:-4}" + local tables + + tables="iptables" + if [ $ip -eq 6 ]; then + tables="ip6tables" + fi + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + ip netns exec $ns2 $tables -A OUTPUT -p tcp \ + -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ + -j DROP +} + for arg in "$@"; do if [ "$arg" = "-c" ]; then capture=1 @@ -94,6 +132,17 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +iptables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run all tests without iptables tool" + exit $ksft_skip +fi + +ip6tables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run all tests without ip6tables tool" + exit $ksft_skip +fi check_transfer() { @@ -135,6 +184,7 @@ do_transfer() connect_addr="$5" rm_nr_ns1="$6" rm_nr_ns2="$7" + speed="$8" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -159,7 +209,7 @@ do_transfer() sleep 1 fi - if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then + if [ $speed = "fast" ]; then mptcp_connect="./mptcp_connect -j" else mptcp_connect="./mptcp_connect -r" @@ -250,26 +300,13 @@ run_tests() listener_ns="$1" connector_ns="$2" connect_addr="$3" + rm_nr_ns1="${4:-0}" + rm_nr_ns2="${5:-0}" + speed="${6:-fast}" lret=0 - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0 - lret=$? - if [ $lret -ne 0 ]; then - ret=$lret - return - fi -} - -run_remove_tests() -{ - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" - rm_nr_ns1="$4" - rm_nr_ns2="$5" - lret=0 - - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ + ${rm_nr_ns1} ${rm_nr_ns2} ${speed} lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -491,12 +528,21 @@ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 +# add_addr timeout +reset_with_add_addr_timeout +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 0 0 slow +chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 +chk_add_nr 4 0 + # single subflow, remove reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 0 1 +run_tests $ns1 $ns2 10.0.1.1 0 1 slow chk_join_nr "remove single subflow" 1 1 1 chk_rm_nr 1 1 @@ -506,7 +552,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 0 2 +run_tests $ns1 $ns2 10.0.1.1 0 2 slow chk_join_nr "remove multiple subflows" 2 2 2 chk_rm_nr 2 2 @@ -515,7 +561,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_remove_tests $ns1 $ns2 10.0.1.1 1 0 +run_tests $ns1 $ns2 10.0.1.1 1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 chk_rm_nr 0 0 @@ -526,7 +572,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 1 1 +run_tests $ns1 $ns2 10.0.1.1 1 1 slow chk_join_nr "remove subflow and signal" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -538,7 +584,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 1 2 +run_tests $ns1 $ns2 10.0.1.1 1 2 slow chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 -- cgit v1.3.1 From f3ae6c6e8a3ea49076d826c64e63ea78fbf9db43 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:30 +0300 Subject: selftests: proc: fix warning: _GNU_SOURCE redefined Makefile already contains -D_GNU_SOURCE, so we can remove it from the *.c files. Signed-off-by: Tommi Rantala Signed-off-by: Shuah Khan --- tools/testing/selftests/proc/proc-loadavg-001.c | 1 - tools/testing/selftests/proc/proc-self-syscall.c | 1 - tools/testing/selftests/proc/proc-uptime-002.c | 1 - 3 files changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index 471e2aa28077..fb4fe9188806 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 9f6d000c0245..8511dcfe67c7 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..e7ceabed7f51 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include #include -- cgit v1.3.1 From 1d44d0dd61b6121b49f25b731f2f7f605cb3c896 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:31 +0300 Subject: selftests: core: use SKIP instead of XFAIL in close_range_test.c XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala Reviewed-by: Kees Cook Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/core/close_range_test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c99b98b0d461..575b391ddc78 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -44,7 +44,7 @@ TEST(close_range) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -52,7 +52,7 @@ TEST(close_range) EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { if (errno == ENOSYS) - XFAIL(return, "close_range() syscall not supported"); + SKIP(return, "close_range() syscall not supported"); } EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); @@ -108,7 +108,7 @@ TEST(close_range_unshare) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -197,7 +197,7 @@ TEST(close_range_unshare_capped) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; -- cgit v1.3.1 From afba8b0a2cc532b54eaf4254092f57bba5d7eb65 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:32 +0300 Subject: selftests: clone3: use SKIP instead of XFAIL XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala Reviewed-by: Kees Cook Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 55bd387ce7ec..52d3f0364bda 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore) test_clone3_supported(); EXPECT_EQ(getuid(), 0) - XFAIL(return, "Skipping all tests as non-root\n"); + SKIP(return, "Skipping all tests as non-root"); memset(&set_tid, 0, sizeof(set_tid)); -- cgit v1.3.1 From 7d764b685ee1bc73a9fa2b6cb4d42fa72b943145 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:33 +0300 Subject: selftests: binderfs: use SKIP instead of XFAIL XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala Reviewed-by: Kees Cook Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/filesystems/binderfs/binderfs_test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 1d27f52c61e6..477cbb042f5b 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); EXPECT_EQ(ret, 0) { if (errno == ENODEV) - XFAIL(goto out, "binderfs missing"); + SKIP(goto out, "binderfs missing"); TH_LOG("%s - Failed to mount binderfs", strerror(errno)); goto rmdir; } @@ -475,10 +475,10 @@ TEST(binderfs_stress) TEST(binderfs_test_privileged) { if (geteuid() != 0) - XFAIL(return, "Tests are not run as root. Skipping privileged tests"); + SKIP(return, "Tests are not run as root. Skipping privileged tests"); if (__do_binderfs_test(_metadata)) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); } TEST(binderfs_test_unprivileged) @@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged) ret = wait_for_pid(pid); if (ret) { if (ret == 2) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); ASSERT_EQ(ret, 0) { TH_LOG("wait_for_pid() failed"); } -- cgit v1.3.1 From f9b7ff0d7f7a466a920424246e7ddc2b84c87e52 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 5 Nov 2020 11:52:30 +0000 Subject: tools/bpftool: Fix attaching flow dissector My earlier patch to reject non-zero arguments to flow dissector attach broke attaching via bpftool. Instead of 0 it uses -1 for target_fd. Fix this by passing a zero argument when attaching the flow dissector. Fixes: 1b514239e859 ("bpf: flow_dissector: Check value of unused flags to BPF_PROG_ATTACH") Reported-by: Jiri Benc Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105115230.296657-1-lmb@cloudflare.com --- tools/bpf/bpftool/prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d942c1e3372c..acdb2c245f0a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -940,7 +940,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd, } if (*attach_type == BPF_FLOW_DISSECTOR) { - *mapfd = -1; + *mapfd = 0; return 0; } -- cgit v1.3.1 From c81ed6d81e0560713ceb94917ff1981848d0614e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:51 -0800 Subject: libbpf: Factor out common operations in BTF writing APIs Factor out commiting of appended type data. Also extract fetching the very last type in the BTF (to append members to). These two operations are common across many APIs and will be easier to refactor with split BTF, if they are extracted into a single place. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-2-andrii@kernel.org --- tools/lib/bpf/btf.c | 123 ++++++++++++++++++---------------------------------- 1 file changed, 43 insertions(+), 80 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 231b07203e3d..89fecfe5cb2b 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1560,6 +1560,20 @@ static void btf_type_inc_vlen(struct btf_type *t) t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); } +static int btf_commit_type(struct btf *btf, int data_sz) +{ + int err; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += data_sz; + btf->hdr->str_off += data_sz; + btf->nr_types++; + return btf->nr_types; +} + /* * Append new BTF_KIND_INT type with: * - *name* - non-empty, non-NULL type name; @@ -1572,7 +1586,7 @@ static void btf_type_inc_vlen(struct btf_type *t) int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding) { struct btf_type *t; - int sz, err, name_off; + int sz, name_off; /* non-empty name */ if (!name || !name[0]) @@ -1606,14 +1620,7 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding /* set INT info, we don't allow setting legacy bit offset/size */ *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8); - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* it's completely legal to append BTF types with type IDs pointing forward to @@ -1631,7 +1638,7 @@ static int validate_type_id(int id) static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) { struct btf_type *t; - int sz, name_off = 0, err; + int sz, name_off = 0; if (validate_type_id(ref_type_id)) return -EINVAL; @@ -1654,14 +1661,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref t->info = btf_type_info(kind, 0, 0); t->type = ref_type_id; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -1689,7 +1689,7 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n { struct btf_type *t; struct btf_array *a; - int sz, err; + int sz; if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) return -EINVAL; @@ -1711,21 +1711,14 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n a->index_type = index_type_id; a->nelems = nr_elems; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* generic STRUCT/UNION append function */ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz) { struct btf_type *t; - int sz, err, name_off = 0; + int sz, name_off = 0; if (btf_ensure_modifiable(btf)) return -ENOMEM; @@ -1748,14 +1741,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 t->info = btf_type_info(kind, 0, 0); t->size = bytes_sz; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -1793,6 +1779,11 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz); } +static struct btf_type *btf_last_type(struct btf *btf) +{ + return btf_type_by_id(btf, btf__get_nr_types(btf)); +} + /* * Append new field for the current STRUCT/UNION type with: * - *name* - name of the field, can be NULL or empty for anonymous field; @@ -1814,7 +1805,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, /* last type should be union/struct */ if (btf->nr_types == 0) return -EINVAL; - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); if (!btf_is_composite(t)) return -EINVAL; @@ -1849,7 +1840,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, m->offset = bit_offset | (bit_size << 24); /* btf_add_type_mem can invalidate t pointer */ - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); /* update parent type's vlen and kflag */ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); @@ -1874,7 +1865,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) { struct btf_type *t; - int sz, err, name_off = 0; + int sz, name_off = 0; /* byte_sz must be power of 2 */ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) @@ -1899,14 +1890,7 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) t->info = btf_type_info(BTF_KIND_ENUM, 0, 0); t->size = byte_sz; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -1926,7 +1910,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) /* last type should be BTF_KIND_ENUM */ if (btf->nr_types == 0) return -EINVAL; - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); if (!btf_is_enum(t)) return -EINVAL; @@ -1953,7 +1937,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) v->val = value; /* update parent type's vlen */ - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); btf_type_inc_vlen(t); btf->hdr->type_len += sz; @@ -2093,7 +2077,7 @@ int btf__add_func(struct btf *btf, const char *name, int btf__add_func_proto(struct btf *btf, int ret_type_id) { struct btf_type *t; - int sz, err; + int sz; if (validate_type_id(ret_type_id)) return -EINVAL; @@ -2113,14 +2097,7 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id) t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0); t->type = ret_type_id; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -2143,7 +2120,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) /* last type should be BTF_KIND_FUNC_PROTO */ if (btf->nr_types == 0) return -EINVAL; - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); if (!btf_is_func_proto(t)) return -EINVAL; @@ -2166,7 +2143,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) p->type = type_id; /* update parent type's vlen */ - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); btf_type_inc_vlen(t); btf->hdr->type_len += sz; @@ -2188,7 +2165,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) { struct btf_type *t; struct btf_var *v; - int sz, err, name_off; + int sz, name_off; /* non-empty name */ if (!name || !name[0]) @@ -2219,14 +2196,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) v = btf_var(t); v->linkage = linkage; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -2244,7 +2214,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) { struct btf_type *t; - int sz, err, name_off; + int sz, name_off; /* non-empty name */ if (!name || !name[0]) @@ -2267,14 +2237,7 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0); t->size = byte_sz; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -2296,7 +2259,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ /* last type should be BTF_KIND_DATASEC */ if (btf->nr_types == 0) return -EINVAL; - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); if (!btf_is_datasec(t)) return -EINVAL; @@ -2317,7 +2280,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ v->size = byte_sz; /* update parent type's vlen */ - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); btf_type_inc_vlen(t); btf->hdr->type_len += sz; -- cgit v1.3.1 From d9448f94962bd28554df7d9a342d37c7f13d6232 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:52 -0800 Subject: selftest/bpf: Relax btf_dedup test checks Remove the requirement of a strictly exact string section contents. This used to be true when string deduplication was done through sorting, but with string dedup done through hash table, it's no longer true. So relax test harness to relax strings checks and, consequently, type checks, which now don't have to have exactly the same string offsets. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201105043402.2530976-3-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/btf.c | 40 +++++++++++++++++----------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 93162484c2ca..8ae97e2a4b9d 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -6652,7 +6652,7 @@ static void do_test_dedup(unsigned int test_num) const void *test_btf_data, *expect_btf_data; const char *ret_test_next_str, *ret_expect_next_str; const char *test_strs, *expect_strs; - const char *test_str_cur, *test_str_end; + const char *test_str_cur; const char *expect_str_cur, *expect_str_end; unsigned int raw_btf_size; void *raw_btf; @@ -6719,12 +6719,18 @@ static void do_test_dedup(unsigned int test_num) goto done; } - test_str_cur = test_strs; - test_str_end = test_strs + test_hdr->str_len; expect_str_cur = expect_strs; expect_str_end = expect_strs + expect_hdr->str_len; - while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { + while (expect_str_cur < expect_str_end) { size_t test_len, expect_len; + int off; + + off = btf__find_str(test_btf, expect_str_cur); + if (CHECK(off < 0, "exp str '%s' not found: %d\n", expect_str_cur, off)) { + err = -1; + goto done; + } + test_str_cur = btf__str_by_offset(test_btf, off); test_len = strlen(test_str_cur); expect_len = strlen(expect_str_cur); @@ -6741,15 +6747,8 @@ static void do_test_dedup(unsigned int test_num) err = -1; goto done; } - test_str_cur += test_len + 1; expect_str_cur += expect_len + 1; } - if (CHECK(test_str_cur != test_str_end, - "test_str_cur:%p != test_str_end:%p", - test_str_cur, test_str_end)) { - err = -1; - goto done; - } test_nr_types = btf__get_nr_types(test_btf); expect_nr_types = btf__get_nr_types(expect_btf); @@ -6775,10 +6774,21 @@ static void do_test_dedup(unsigned int test_num) err = -1; goto done; } - if (CHECK(memcmp((void *)test_type, - (void *)expect_type, - test_size), - "type #%d: contents differ", i)) { + if (CHECK(btf_kind(test_type) != btf_kind(expect_type), + "type %d kind: exp %d != got %u\n", + i, btf_kind(expect_type), btf_kind(test_type))) { + err = -1; + goto done; + } + if (CHECK(test_type->info != expect_type->info, + "type %d info: exp %d != got %u\n", + i, expect_type->info, test_type->info)) { + err = -1; + goto done; + } + if (CHECK(test_type->size != expect_type->size, + "type %d size/type: exp %d != got %u\n", + i, expect_type->size, test_type->size)) { err = -1; goto done; } -- cgit v1.3.1 From 88a82c2a9ab5b2ce533c3de3f4517853c2f67f53 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:53 -0800 Subject: libbpf: Unify and speed up BTF string deduplication Revamp BTF dedup's string deduplication to match the approach of writable BTF string management. This allows to transfer deduplicated strings index back to BTF object after deduplication without expensive extra memory copying and hash map re-construction. It also simplifies the code and speeds it up, because hashmap-based string deduplication is faster than sort + unique approach. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-4-andrii@kernel.org --- tools/lib/bpf/btf.c | 263 ++++++++++++++++++++-------------------------------- 1 file changed, 98 insertions(+), 165 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 89fecfe5cb2b..fa1b147c63c6 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -90,6 +90,14 @@ struct btf { struct hashmap *strs_hash; /* whether strings are already deduplicated */ bool strs_deduped; + /* extra indirection layer to make strings hashmap work with stable + * string offsets and ability to transparently choose between + * btf->strs_data or btf_dedup->strs_data as a source of strings. + * This is used for BTF strings dedup to transfer deduplicated strings + * data back to struct btf without re-building strings index. + */ + void **strs_data_ptr; + /* BTF object FD, if loaded into kernel */ int fd; @@ -1363,17 +1371,19 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, static size_t strs_hash_fn(const void *key, void *ctx) { - struct btf *btf = ctx; - const char *str = btf->strs_data + (long)key; + const struct btf *btf = ctx; + const char *strs = *btf->strs_data_ptr; + const char *str = strs + (long)key; return str_hash(str); } static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx) { - struct btf *btf = ctx; - const char *str1 = btf->strs_data + (long)key1; - const char *str2 = btf->strs_data + (long)key2; + const struct btf *btf = ctx; + const char *strs = *btf->strs_data_ptr; + const char *str1 = strs + (long)key1; + const char *str2 = strs + (long)key2; return strcmp(str1, str2) == 0; } @@ -1418,6 +1428,9 @@ static int btf_ensure_modifiable(struct btf *btf) memcpy(types, btf->types_data, btf->hdr->type_len); memcpy(strs, btf->strs_data, btf->hdr->str_len); + /* make hashmap below use btf->strs_data as a source of strings */ + btf->strs_data_ptr = &btf->strs_data; + /* build lookup index for all strings */ hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf); if (IS_ERR(hash)) { @@ -2824,19 +2837,11 @@ struct btf_dedup { size_t hypot_cap; /* Various option modifying behavior of algorithm */ struct btf_dedup_opts opts; -}; - -struct btf_str_ptr { - const char *str; - __u32 new_off; - bool used; -}; - -struct btf_str_ptrs { - struct btf_str_ptr *ptrs; - const char *data; - __u32 cnt; - __u32 cap; + /* temporary strings deduplication state */ + void *strs_data; + size_t strs_cap; + size_t strs_len; + struct hashmap* strs_hash; }; static long hash_combine(long h, long value) @@ -3063,64 +3068,41 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) return 0; } -static int str_sort_by_content(const void *a1, const void *a2) -{ - const struct btf_str_ptr *p1 = a1; - const struct btf_str_ptr *p2 = a2; - - return strcmp(p1->str, p2->str); -} - -static int str_sort_by_offset(const void *a1, const void *a2) -{ - const struct btf_str_ptr *p1 = a1; - const struct btf_str_ptr *p2 = a2; - - if (p1->str != p2->str) - return p1->str < p2->str ? -1 : 1; - return 0; -} - -static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) -{ - const struct btf_str_ptr *p = pelem; - - if (str_ptr != p->str) - return (const char *)str_ptr < p->str ? -1 : 1; - return 0; -} - -static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) +static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx) { - struct btf_str_ptrs *strs; - struct btf_str_ptr *s; + struct btf_dedup *d = ctx; + long old_off, new_off, len; + const char *s; + void *p; + int err; if (*str_off_ptr == 0) return 0; - strs = ctx; - s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, - sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); - if (!s) - return -EINVAL; - s->used = true; - return 0; -} + s = btf__str_by_offset(d->btf, *str_off_ptr); + len = strlen(s) + 1; -static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) -{ - struct btf_str_ptrs *strs; - struct btf_str_ptr *s; + new_off = d->strs_len; + p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len); + if (!p) + return -ENOMEM; - if (*str_off_ptr == 0) - return 0; + memcpy(p, s, len); - strs = ctx; - s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, - sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); - if (!s) - return -EINVAL; - *str_off_ptr = s->new_off; + /* Now attempt to add the string, but only if the string with the same + * contents doesn't exist already (HASHMAP_ADD strategy). If such + * string exists, we'll get its offset in old_off (that's old_key). + */ + err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off, + HASHMAP_ADD, (const void **)&old_off, NULL); + if (err == -EEXIST) { + *str_off_ptr = old_off; + } else if (err) { + return err; + } else { + *str_off_ptr = new_off; + d->strs_len += len; + } return 0; } @@ -3137,118 +3119,69 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) */ static int btf_dedup_strings(struct btf_dedup *d) { - char *start = d->btf->strs_data; - char *end = start + d->btf->hdr->str_len; - char *p = start, *tmp_strs = NULL; - struct btf_str_ptrs strs = { - .cnt = 0, - .cap = 0, - .ptrs = NULL, - .data = start, - }; - int i, j, err = 0, grp_idx; - bool grp_used; + char *s; + int err; if (d->btf->strs_deduped) return 0; - /* build index of all strings */ - while (p < end) { - if (strs.cnt + 1 > strs.cap) { - struct btf_str_ptr *new_ptrs; - - strs.cap += max(strs.cnt / 2, 16U); - new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0])); - if (!new_ptrs) { - err = -ENOMEM; - goto done; - } - strs.ptrs = new_ptrs; - } - - strs.ptrs[strs.cnt].str = p; - strs.ptrs[strs.cnt].used = false; - - p += strlen(p) + 1; - strs.cnt++; - } - - /* temporary storage for deduplicated strings */ - tmp_strs = malloc(d->btf->hdr->str_len); - if (!tmp_strs) { - err = -ENOMEM; - goto done; - } - - /* mark all used strings */ - strs.ptrs[0].used = true; - err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); - if (err) - goto done; - - /* sort strings by context, so that we can identify duplicates */ - qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); + s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1); + if (!s) + return -ENOMEM; + /* initial empty string */ + s[0] = 0; + d->strs_len = 1; - /* - * iterate groups of equal strings and if any instance in a group was - * referenced, emit single instance and remember new offset + /* temporarily switch to use btf_dedup's strs_data for strings for hash + * functions; later we'll just transfer hashmap to struct btf as is, + * along the strs_data */ - p = tmp_strs; - grp_idx = 0; - grp_used = strs.ptrs[0].used; - /* iterate past end to avoid code duplication after loop */ - for (i = 1; i <= strs.cnt; i++) { - /* - * when i == strs.cnt, we want to skip string comparison and go - * straight to handling last group of strings (otherwise we'd - * need to handle last group after the loop w/ duplicated code) - */ - if (i < strs.cnt && - !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { - grp_used = grp_used || strs.ptrs[i].used; - continue; - } + d->btf->strs_data_ptr = &d->strs_data; - /* - * this check would have been required after the loop to handle - * last group of strings, but due to <= condition in a loop - * we avoid that duplication - */ - if (grp_used) { - int new_off = p - tmp_strs; - __u32 len = strlen(strs.ptrs[grp_idx].str); - - memmove(p, strs.ptrs[grp_idx].str, len + 1); - for (j = grp_idx; j < i; j++) - strs.ptrs[j].new_off = new_off; - p += len + 1; - } - - if (i < strs.cnt) { - grp_idx = i; - grp_used = strs.ptrs[i].used; - } + d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf); + if (IS_ERR(d->strs_hash)) { + err = PTR_ERR(d->strs_hash); + d->strs_hash = NULL; + goto err_out; } - /* replace original strings with deduped ones */ - d->btf->hdr->str_len = p - tmp_strs; - memmove(start, tmp_strs, d->btf->hdr->str_len); - end = start + d->btf->hdr->str_len; - - /* restore original order for further binary search lookups */ - qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); + /* insert empty string; we won't be looking it up during strings + * dedup, but it's good to have it for generic BTF string lookups + */ + err = hashmap__insert(d->strs_hash, (void *)0, (void *)0, + HASHMAP_ADD, NULL, NULL); + if (err) + goto err_out; /* remap string offsets */ - err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); + err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d); if (err) - goto done; + goto err_out; - d->btf->hdr->str_len = end - start; + /* replace BTF string data and hash with deduped ones */ + free(d->btf->strs_data); + hashmap__free(d->btf->strs_hash); + d->btf->strs_data = d->strs_data; + d->btf->strs_data_cap = d->strs_cap; + d->btf->hdr->str_len = d->strs_len; + d->btf->strs_hash = d->strs_hash; + /* now point strs_data_ptr back to btf->strs_data */ + d->btf->strs_data_ptr = &d->btf->strs_data; + + d->strs_data = d->strs_hash = NULL; + d->strs_len = d->strs_cap = 0; d->btf->strs_deduped = true; + return 0; + +err_out: + free(d->strs_data); + hashmap__free(d->strs_hash); + d->strs_data = d->strs_hash = NULL; + d->strs_len = d->strs_cap = 0; + + /* restore strings pointer for existing d->btf->strs_hash back */ + d->btf->strs_data_ptr = &d->strs_data; -done: - free(tmp_strs); - free(strs.ptrs); return err; } -- cgit v1.3.1 From ba451366bf44498f22dd16c31a792083bd6f2ae1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:54 -0800 Subject: libbpf: Implement basic split BTF support Support split BTF operation, in which one BTF (base BTF) provides basic set of types and strings, while another one (split BTF) builds on top of base's types and strings and adds its own new types and strings. From API standpoint, the fact that the split BTF is built on top of the base BTF is transparent. Type numeration is transparent. If the base BTF had last type ID #N, then all types in the split BTF start at type ID N+1. Any type in split BTF can reference base BTF types, but not vice versa. Programmatically construction of a split BTF on top of a base BTF is supported: one can create an empty split BTF with btf__new_empty_split() and pass base BTF as an input, or pass raw binary data to btf__new_split(), or use btf__parse_xxx_split() variants to get initial set of split types/strings from the ELF file with .BTF section. String offsets are similarly transparent and are a logical continuation of base BTF's strings. When building BTF programmatically and adding a new string (explicitly with btf__add_str() or implicitly through appending new types/members), string-to-be-added would first be looked up from the base BTF's string section and re-used if it's there. If not, it will be looked up and/or added to the split BTF string section. Similarly to type IDs, types in split BTF can refer to strings from base BTF absolutely transparently (but not vice versa, of course, because base BTF doesn't "know" about existence of split BTF). Internal type index is slightly adjusted to be zero-indexed, ignoring a fake [0] VOID type. This allows to handle split/base BTF type lookups transparently by using btf->start_id type ID offset, which is always 1 for base/non-split BTF and equals btf__get_nr_types(base_btf) + 1 for the split BTF. BTF deduplication is not yet supported for split BTF and support for it will be added in separate patch. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-5-andrii@kernel.org --- tools/lib/bpf/btf.c | 197 ++++++++++++++++++++++++++++++++++++----------- tools/lib/bpf/btf.h | 8 ++ tools/lib/bpf/libbpf.map | 9 +++ 3 files changed, 169 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index fa1b147c63c6..0258cf108c0a 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -78,10 +78,32 @@ struct btf { void *types_data; size_t types_data_cap; /* used size stored in hdr->type_len */ - /* type ID to `struct btf_type *` lookup index */ + /* type ID to `struct btf_type *` lookup index + * type_offs[0] corresponds to the first non-VOID type: + * - for base BTF it's type [1]; + * - for split BTF it's the first non-base BTF type. + */ __u32 *type_offs; size_t type_offs_cap; + /* number of types in this BTF instance: + * - doesn't include special [0] void type; + * - for split BTF counts number of types added on top of base BTF. + */ __u32 nr_types; + /* if not NULL, points to the base BTF on top of which the current + * split BTF is based + */ + struct btf *base_btf; + /* BTF type ID of the first type in this BTF instance: + * - for base BTF it's equal to 1; + * - for split BTF it's equal to biggest type ID of base BTF plus 1. + */ + int start_id; + /* logical string offset of this BTF instance: + * - for base BTF it's equal to 0; + * - for split BTF it's equal to total size of base BTF's string section size. + */ + int start_str_off; void *strs_data; size_t strs_data_cap; /* used size stored in hdr->str_len */ @@ -176,7 +198,7 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) __u32 *p; p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), - btf->nr_types + 1, BTF_MAX_NR_TYPES, 1); + btf->nr_types, BTF_MAX_NR_TYPES, 1); if (!p) return -ENOMEM; @@ -252,12 +274,16 @@ static int btf_parse_str_sec(struct btf *btf) const char *start = btf->strs_data; const char *end = start + btf->hdr->str_len; - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || - start[0] || end[-1]) { + if (btf->base_btf && hdr->str_len == 0) + return 0; + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { + pr_debug("Invalid BTF string section\n"); + return -EINVAL; + } + if (!btf->base_btf && start[0]) { pr_debug("Invalid BTF string section\n"); return -EINVAL; } - return 0; } @@ -372,19 +398,9 @@ static int btf_parse_type_sec(struct btf *btf) struct btf_header *hdr = btf->hdr; void *next_type = btf->types_data; void *end_type = next_type + hdr->type_len; - int err, i = 0, type_size; - - /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(), - * so ensure we can never properly use its offset from index by - * setting it to a large value - */ - err = btf_add_type_idx_entry(btf, UINT_MAX); - if (err) - return err; + int err, type_size; while (next_type + sizeof(struct btf_type) <= end_type) { - i++; - if (btf->swapped_endian) btf_bswap_type_base(next_type); @@ -392,7 +408,7 @@ static int btf_parse_type_sec(struct btf *btf) if (type_size < 0) return type_size; if (next_type + type_size > end_type) { - pr_warn("BTF type [%d] is malformed\n", i); + pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types); return -EINVAL; } @@ -417,7 +433,7 @@ static int btf_parse_type_sec(struct btf *btf) __u32 btf__get_nr_types(const struct btf *btf) { - return btf->nr_types; + return btf->start_id + btf->nr_types - 1; } /* internal helper returning non-const pointer to a type */ @@ -425,13 +441,14 @@ static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) { if (type_id == 0) return &btf_void; - - return btf->types_data + btf->type_offs[type_id]; + if (type_id < btf->start_id) + return btf_type_by_id(btf->base_btf, type_id); + return btf->types_data + btf->type_offs[type_id - btf->start_id]; } const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { - if (type_id > btf->nr_types) + if (type_id >= btf->start_id + btf->nr_types) return NULL; return btf_type_by_id((struct btf *)btf, type_id); } @@ -440,9 +457,13 @@ static int determine_ptr_size(const struct btf *btf) { const struct btf_type *t; const char *name; - int i; + int i, n; - for (i = 1; i <= btf->nr_types; i++) { + if (btf->base_btf && btf->base_btf->ptr_sz > 0) + return btf->base_btf->ptr_sz; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_int(t)) continue; @@ -725,7 +746,7 @@ void btf__free(struct btf *btf) free(btf); } -struct btf *btf__new_empty(void) +static struct btf *btf_new_empty(struct btf *base_btf) { struct btf *btf; @@ -733,12 +754,21 @@ struct btf *btf__new_empty(void) if (!btf) return ERR_PTR(-ENOMEM); + btf->nr_types = 0; + btf->start_id = 1; + btf->start_str_off = 0; btf->fd = -1; btf->ptr_sz = sizeof(void *); btf->swapped_endian = false; + if (base_btf) { + btf->base_btf = base_btf; + btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_str_off = base_btf->hdr->str_len; + } + /* +1 for empty string at offset 0 */ - btf->raw_size = sizeof(struct btf_header) + 1; + btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1); btf->raw_data = calloc(1, btf->raw_size); if (!btf->raw_data) { free(btf); @@ -752,12 +782,22 @@ struct btf *btf__new_empty(void) btf->types_data = btf->raw_data + btf->hdr->hdr_len; btf->strs_data = btf->raw_data + btf->hdr->hdr_len; - btf->hdr->str_len = 1; /* empty string at offset 0 */ + btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */ return btf; } -struct btf *btf__new(const void *data, __u32 size) +struct btf *btf__new_empty(void) +{ + return btf_new_empty(NULL); +} + +struct btf *btf__new_empty_split(struct btf *base_btf) +{ + return btf_new_empty(base_btf); +} + +static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) { struct btf *btf; int err; @@ -766,6 +806,16 @@ struct btf *btf__new(const void *data, __u32 size) if (!btf) return ERR_PTR(-ENOMEM); + btf->nr_types = 0; + btf->start_id = 1; + btf->start_str_off = 0; + + if (base_btf) { + btf->base_btf = base_btf; + btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_str_off = base_btf->hdr->str_len; + } + btf->raw_data = malloc(size); if (!btf->raw_data) { err = -ENOMEM; @@ -798,7 +848,13 @@ done: return btf; } -struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) +struct btf *btf__new(const void *data, __u32 size) +{ + return btf_new(data, size, NULL); +} + +static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, + struct btf_ext **btf_ext) { Elf_Data *btf_data = NULL, *btf_ext_data = NULL; int err = 0, fd = -1, idx = 0; @@ -876,7 +932,7 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) err = -ENOENT; goto done; } - btf = btf__new(btf_data->d_buf, btf_data->d_size); + btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); if (IS_ERR(btf)) goto done; @@ -921,7 +977,17 @@ done: return btf; } -struct btf *btf__parse_raw(const char *path) +struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) +{ + return btf_parse_elf(path, NULL, btf_ext); +} + +struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf) +{ + return btf_parse_elf(path, base_btf, NULL); +} + +static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) { struct btf *btf = NULL; void *data = NULL; @@ -975,7 +1041,7 @@ struct btf *btf__parse_raw(const char *path) } /* finally parse BTF data */ - btf = btf__new(data, sz); + btf = btf_new(data, sz, base_btf); err_out: free(data); @@ -984,18 +1050,38 @@ err_out: return err ? ERR_PTR(err) : btf; } -struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) +struct btf *btf__parse_raw(const char *path) +{ + return btf_parse_raw(path, NULL); +} + +struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) +{ + return btf_parse_raw(path, base_btf); +} + +static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { struct btf *btf; if (btf_ext) *btf_ext = NULL; - btf = btf__parse_raw(path); + btf = btf_parse_raw(path, base_btf); if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO) return btf; - return btf__parse_elf(path, btf_ext); + return btf_parse_elf(path, base_btf, btf_ext); +} + +struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) +{ + return btf_parse(path, NULL, btf_ext); +} + +struct btf *btf__parse_split(const char *path, struct btf *base_btf) +{ + return btf_parse(path, base_btf, NULL); } static int compare_vsi_off(const void *_a, const void *_b) @@ -1179,8 +1265,8 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi memcpy(p, btf->types_data, hdr->type_len); if (swap_endian) { - for (i = 1; i <= btf->nr_types; i++) { - t = p + btf->type_offs[i]; + for (i = 0; i < btf->nr_types; i++) { + t = p + btf->type_offs[i]; /* btf_bswap_type_rest() relies on native t->info, so * we swap base type info after we swapped all the * additional information @@ -1223,8 +1309,10 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { - if (offset < btf->hdr->str_len) - return btf->strs_data + offset; + if (offset < btf->start_str_off) + return btf__str_by_offset(btf->base_btf, offset); + else if (offset - btf->start_str_off < btf->hdr->str_len) + return btf->strs_data + (offset - btf->start_str_off); else return NULL; } @@ -1461,7 +1549,10 @@ static int btf_ensure_modifiable(struct btf *btf) /* if BTF was created from scratch, all strings are guaranteed to be * unique and deduplicated */ - btf->strs_deduped = btf->hdr->str_len <= 1; + if (btf->hdr->str_len == 0) + btf->strs_deduped = true; + if (!btf->base_btf && btf->hdr->str_len == 1) + btf->strs_deduped = true; /* invalidate raw_data representation */ btf_invalidate_raw_data(btf); @@ -1493,6 +1584,14 @@ int btf__find_str(struct btf *btf, const char *s) long old_off, new_off, len; void *p; + if (btf->base_btf) { + int ret; + + ret = btf__find_str(btf->base_btf, s); + if (ret != -ENOENT) + return ret; + } + /* BTF needs to be in a modifiable state to build string lookup index */ if (btf_ensure_modifiable(btf)) return -ENOMEM; @@ -1507,7 +1606,7 @@ int btf__find_str(struct btf *btf, const char *s) memcpy(p, s, len); if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off)) - return old_off; + return btf->start_str_off + old_off; return -ENOENT; } @@ -1523,6 +1622,14 @@ int btf__add_str(struct btf *btf, const char *s) void *p; int err; + if (btf->base_btf) { + int ret; + + ret = btf__find_str(btf->base_btf, s); + if (ret != -ENOENT) + return ret; + } + if (btf_ensure_modifiable(btf)) return -ENOMEM; @@ -1549,12 +1656,12 @@ int btf__add_str(struct btf *btf, const char *s) err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off, HASHMAP_ADD, (const void **)&old_off, NULL); if (err == -EEXIST) - return old_off; /* duplicated string, return existing offset */ + return btf->start_str_off + old_off; /* duplicated string, return existing offset */ if (err) return err; btf->hdr->str_len += len; /* new unique string, adjust data length */ - return new_off; + return btf->start_str_off + new_off; } static void *btf_add_type_mem(struct btf *btf, size_t add_sz) @@ -1584,7 +1691,7 @@ static int btf_commit_type(struct btf *btf, int data_sz) btf->hdr->type_len += data_sz; btf->hdr->str_off += data_sz; btf->nr_types++; - return btf->nr_types; + return btf->start_id + btf->nr_types - 1; } /* @@ -4167,14 +4274,14 @@ static int btf_dedup_compact_types(struct btf_dedup *d) memmove(p, btf__type_by_id(d->btf, i), len); d->hypot_map[i] = next_type_id; - d->btf->type_offs[next_type_id] = p - d->btf->types_data; + d->btf->type_offs[next_type_id - 1] = p - d->btf->types_data; p += len; next_type_id++; } /* shrink struct btf's internal types index and update btf_header */ d->btf->nr_types = next_type_id - 1; - d->btf->type_offs_cap = d->btf->nr_types + 1; + d->btf->type_offs_cap = d->btf->nr_types; d->btf->hdr->type_len = p - d->btf->types_data; new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, sizeof(*new_offs)); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 57247240a20a..1093f6fe6800 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -31,11 +31,19 @@ enum btf_endianness { }; LIBBPF_API void btf__free(struct btf *btf); + LIBBPF_API struct btf *btf__new(const void *data, __u32 size); +LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf); LIBBPF_API struct btf *btf__new_empty(void); +LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); + LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); +LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); +LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__parse_raw(const char *path); +LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); + LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 4ebfadf45b47..29ff4807b909 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -337,3 +337,12 @@ LIBBPF_0.2.0 { perf_buffer__consume_buffer; xsk_socket__create_shared; } LIBBPF_0.1.0; + +LIBBPF_0.3.0 { + global: + btf__parse_elf_split; + btf__parse_raw_split; + btf__parse_split; + btf__new_empty_split; + btf__new_split; +} LIBBPF_0.2.0; -- cgit v1.3.1 From 197389da2fbfbc3cefb229268c32d858d9575c96 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:55 -0800 Subject: selftests/bpf: Add split BTF basic test Add selftest validating ability to programmatically generate and then dump split BTF. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-6-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/btf_split.c | 99 ++++++++++++++++++++++ tools/testing/selftests/bpf/test_progs.h | 11 +++ 2 files changed, 110 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_split.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c new file mode 100644 index 000000000000..ca7c2a91610a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include + +static char *dump_buf; +static size_t dump_buf_sz; +static FILE *dump_buf_file; + +static void btf_dump_printf(void *ctx, const char *fmt, va_list args) +{ + vfprintf(ctx, fmt, args); +} + +void test_btf_split() { + struct btf_dump_opts opts; + struct btf_dump *d = NULL; + const struct btf_type *t; + struct btf *btf1, *btf2; + int str_off, i, err; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 1); /* [2] ptr to int */ + + btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + /* pointer size should be "inherited" from main BTF */ + ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz"); + + str_off = btf__find_str(btf2, "int"); + ASSERT_NEQ(str_off, -ENOENT, "str_int_missing"); + + t = btf__type_by_id(btf2, 1); + if (!ASSERT_OK_PTR(t, "int_type")) + goto cleanup; + ASSERT_EQ(btf_is_int(t), true, "int_kind"); + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name"); + + btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */ + btf__add_field(btf2, "f1", 3, 0, 0); /* struct s1 f1; */ + btf__add_field(btf2, "f2", 1, 32, 0); /* int f2; */ + btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */ + /* } */ + + t = btf__type_by_id(btf1, 4); + ASSERT_NULL(t, "split_type_in_main"); + + t = btf__type_by_id(btf2, 4); + if (!ASSERT_OK_PTR(t, "split_struct_type")) + goto cleanup; + ASSERT_EQ(btf_is_struct(t), true, "split_struct_kind"); + ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen"); + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name"); + + /* BTF-to-C dump of split BTF */ + dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); + if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) + return; + opts.ctx = dump_buf_file; + d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); + if (!ASSERT_OK_PTR(d, "btf_dump__new")) + goto cleanup; + for (i = 1; i <= btf__get_nr_types(btf2); i++) { + err = btf_dump__dump_type(d, i); + ASSERT_OK(err, "dump_type_ok"); + } + fflush(dump_buf_file); + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ + ASSERT_STREQ(dump_buf, +"struct s1 {\n" +" int f1;\n" +"};\n" +"\n" +"struct s2 {\n" +" struct s1 f1;\n" +" int f2;\n" +" int *f3;\n" +"};\n\n", "c_dump"); + +cleanup: + if (dump_buf_file) + fclose(dump_buf_file); + free(dump_buf); + btf_dump__free(d); + btf__free(btf1); + btf__free(btf2); +} diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 238f5f61189e..d6b14853f3bc 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -141,6 +141,17 @@ extern int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_NEQ(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act != ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld == expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + #define ASSERT_STREQ(actual, expected, name) ({ \ static int duration = 0; \ const char *___act = actual; \ -- cgit v1.3.1 From 1306c980cf892bc17e7296d3e9ab8e9082f893a1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:56 -0800 Subject: selftests/bpf: Add checking of raw type dump in BTF writer APIs selftests Add re-usable btf_helpers.{c,h} to provide BTF-related testing routines. Start with adding a raw BTF dumping helpers. Raw BTF dump is the most succinct and at the same time a very human-friendly way to validate exact contents of BTF types. Cross-validate raw BTF dump and writable BTF in a single selftest. Raw type dump checks also serve as a good self-documentation. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-7-andrii@kernel.org --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/btf_helpers.c | 200 +++++++++++++++++++++ tools/testing/selftests/bpf/btf_helpers.h | 12 ++ tools/testing/selftests/bpf/prog_tests/btf_write.c | 43 +++++ 4 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/btf_helpers.c create mode 100644 tools/testing/selftests/bpf/btf_helpers.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 50e5b18fc455..c1708ffa6b1c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -386,7 +386,7 @@ TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ - flow_dissector_load.h + btf_helpers.c flow_dissector_load.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c new file mode 100644 index 000000000000..abc3f6c04cfc --- /dev/null +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include +#include + +static const char * const btf_kind_str_mapping[] = { + [BTF_KIND_UNKN] = "UNKNOWN", + [BTF_KIND_INT] = "INT", + [BTF_KIND_PTR] = "PTR", + [BTF_KIND_ARRAY] = "ARRAY", + [BTF_KIND_STRUCT] = "STRUCT", + [BTF_KIND_UNION] = "UNION", + [BTF_KIND_ENUM] = "ENUM", + [BTF_KIND_FWD] = "FWD", + [BTF_KIND_TYPEDEF] = "TYPEDEF", + [BTF_KIND_VOLATILE] = "VOLATILE", + [BTF_KIND_CONST] = "CONST", + [BTF_KIND_RESTRICT] = "RESTRICT", + [BTF_KIND_FUNC] = "FUNC", + [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", + [BTF_KIND_VAR] = "VAR", + [BTF_KIND_DATASEC] = "DATASEC", +}; + +static const char *btf_kind_str(__u16 kind) +{ + if (kind > BTF_KIND_DATASEC) + return "UNKNOWN"; + return btf_kind_str_mapping[kind]; +} + +static const char *btf_int_enc_str(__u8 encoding) +{ + switch (encoding) { + case 0: + return "(none)"; + case BTF_INT_SIGNED: + return "SIGNED"; + case BTF_INT_CHAR: + return "CHAR"; + case BTF_INT_BOOL: + return "BOOL"; + default: + return "UNKN"; + } +} + +static const char *btf_var_linkage_str(__u32 linkage) +{ + switch (linkage) { + case BTF_VAR_STATIC: + return "static"; + case BTF_VAR_GLOBAL_ALLOCATED: + return "global-alloc"; + default: + return "(unknown)"; + } +} + +static const char *btf_func_linkage_str(const struct btf_type *t) +{ + switch (btf_vlen(t)) { + case BTF_FUNC_STATIC: + return "static"; + case BTF_FUNC_GLOBAL: + return "global"; + case BTF_FUNC_EXTERN: + return "extern"; + default: + return "(unknown)"; + } +} + +static const char *btf_str(const struct btf *btf, __u32 off) +{ + if (!off) + return "(anon)"; + return btf__str_by_offset(btf, off) ?: "(invalid)"; +} + +int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) +{ + const struct btf_type *t; + int kind, i; + __u32 vlen; + + t = btf__type_by_id(btf, id); + if (!t) + return -EINVAL; + + vlen = btf_vlen(t); + kind = btf_kind(t); + + fprintf(out, "[%u] %s '%s'", id, btf_kind_str(kind), btf_str(btf, t->name_off)); + + switch (kind) { + case BTF_KIND_INT: + fprintf(out, " size=%u bits_offset=%u nr_bits=%u encoding=%s", + t->size, btf_int_offset(t), btf_int_bits(t), + btf_int_enc_str(btf_int_encoding(t))); + break; + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + fprintf(out, " type_id=%u", t->type); + break; + case BTF_KIND_ARRAY: { + const struct btf_array *arr = btf_array(t); + + fprintf(out, " type_id=%u index_type_id=%u nr_elems=%u", + arr->type, arr->index_type, arr->nelems); + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + + fprintf(out, " size=%u vlen=%u", t->size, vlen); + for (i = 0; i < vlen; i++, m++) { + __u32 bit_off, bit_sz; + + bit_off = btf_member_bit_offset(t, i); + bit_sz = btf_member_bitfield_size(t, i); + fprintf(out, "\n\t'%s' type_id=%u bits_offset=%u", + btf_str(btf, m->name_off), m->type, bit_off); + if (bit_sz) + fprintf(out, " bitfield_size=%u", bit_sz); + } + break; + } + case BTF_KIND_ENUM: { + const struct btf_enum *v = btf_enum(t); + + fprintf(out, " size=%u vlen=%u", t->size, vlen); + for (i = 0; i < vlen; i++, v++) { + fprintf(out, "\n\t'%s' val=%u", + btf_str(btf, v->name_off), v->val); + } + break; + } + case BTF_KIND_FWD: + fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct"); + break; + case BTF_KIND_FUNC: + fprintf(out, " type_id=%u linkage=%s", t->type, btf_func_linkage_str(t)); + break; + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = btf_params(t); + + fprintf(out, " ret_type_id=%u vlen=%u", t->type, vlen); + for (i = 0; i < vlen; i++, p++) { + fprintf(out, "\n\t'%s' type_id=%u", + btf_str(btf, p->name_off), p->type); + } + break; + } + case BTF_KIND_VAR: + fprintf(out, " type_id=%u, linkage=%s", + t->type, btf_var_linkage_str(btf_var(t)->linkage)); + break; + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *v = btf_var_secinfos(t); + + fprintf(out, " size=%u vlen=%u", t->size, vlen); + for (i = 0; i < vlen; i++, v++) { + fprintf(out, "\n\ttype_id=%u offset=%u size=%u", + v->type, v->offset, v->size); + } + break; + } + default: + break; + } + + return 0; +} + +/* Print raw BTF type dump into a local buffer and return string pointer back. + * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls + */ +const char *btf_type_raw_dump(const struct btf *btf, int type_id) +{ + static char buf[16 * 1024]; + FILE *buf_file; + + buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); + if (!buf_file) { + fprintf(stderr, "Failed to open memstream: %d\n", errno); + return NULL; + } + + fprintf_btf_type_raw(buf_file, btf, type_id); + fflush(buf_file); + fclose(buf_file); + + return buf; +} diff --git a/tools/testing/selftests/bpf/btf_helpers.h b/tools/testing/selftests/bpf/btf_helpers.h new file mode 100644 index 000000000000..2c9ce1b61dc9 --- /dev/null +++ b/tools/testing/selftests/bpf/btf_helpers.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +#ifndef __BTF_HELPERS_H +#define __BTF_HELPERS_H + +#include +#include + +int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id); +const char *btf_type_raw_dump(const struct btf *btf, int type_id); + +#endif diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index 314e1e7c36df..f36da15b134f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -2,6 +2,7 @@ /* Copyright (c) 2020 Facebook */ #include #include +#include "btf_helpers.h" static int duration = 0; @@ -39,6 +40,8 @@ void test_btf_write() { ASSERT_EQ(t->size, 4, "int_sz"); ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc"); ASSERT_EQ(btf_int_bits(t), 32, "int_bits"); + ASSERT_STREQ(btf_type_raw_dump(btf, 1), + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", "raw_dump"); /* invalid int size */ id = btf__add_int(btf, "bad sz int", 7, 0); @@ -59,24 +62,32 @@ void test_btf_write() { t = btf__type_by_id(btf, 2); ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind"); ASSERT_EQ(t->type, 1, "ptr_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 2), + "[2] PTR '(anon)' type_id=1", "raw_dump"); id = btf__add_const(btf, 5); /* points forward to restrict */ ASSERT_EQ(id, 3, "const_id"); t = btf__type_by_id(btf, 3); ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind"); ASSERT_EQ(t->type, 5, "const_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 3), + "[3] CONST '(anon)' type_id=5", "raw_dump"); id = btf__add_volatile(btf, 3); ASSERT_EQ(id, 4, "volatile_id"); t = btf__type_by_id(btf, 4); ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind"); ASSERT_EQ(t->type, 3, "volatile_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 4), + "[4] VOLATILE '(anon)' type_id=3", "raw_dump"); id = btf__add_restrict(btf, 4); ASSERT_EQ(id, 5, "restrict_id"); t = btf__type_by_id(btf, 5); ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind"); ASSERT_EQ(t->type, 4, "restrict_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 5), + "[5] RESTRICT '(anon)' type_id=4", "raw_dump"); /* ARRAY */ id = btf__add_array(btf, 1, 2, 10); /* int *[10] */ @@ -86,6 +97,8 @@ void test_btf_write() { ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type"); ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type"); ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems"); + ASSERT_STREQ(btf_type_raw_dump(btf, 6), + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", "raw_dump"); /* STRUCT */ err = btf__add_field(btf, "field", 1, 0, 0); @@ -113,6 +126,10 @@ void test_btf_write() { ASSERT_EQ(m->type, 1, "f2_type"); ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off"); ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz"); + ASSERT_STREQ(btf_type_raw_dump(btf, 7), + "[7] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "raw_dump"); /* UNION */ id = btf__add_union(btf, "u1", 8); @@ -136,6 +153,9 @@ void test_btf_write() { ASSERT_EQ(m->type, 1, "f1_type"); ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz"); + ASSERT_STREQ(btf_type_raw_dump(btf, 8), + "[8] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", "raw_dump"); /* ENUM */ id = btf__add_enum(btf, "e1", 4); @@ -156,6 +176,10 @@ void test_btf_write() { v = btf_enum(t) + 1; ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); ASSERT_EQ(v->val, 2, "v2_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 9), + "[9] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", "raw_dump"); /* FWDs */ id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT); @@ -164,6 +188,8 @@ void test_btf_write() { ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name"); ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag"); + ASSERT_STREQ(btf_type_raw_dump(btf, 10), + "[10] FWD 'struct_fwd' fwd_kind=struct", "raw_dump"); id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION); ASSERT_EQ(id, 11, "union_fwd_id"); @@ -171,6 +197,8 @@ void test_btf_write() { ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name"); ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag"); + ASSERT_STREQ(btf_type_raw_dump(btf, 11), + "[11] FWD 'union_fwd' fwd_kind=union", "raw_dump"); id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM); ASSERT_EQ(id, 12, "enum_fwd_id"); @@ -179,6 +207,8 @@ void test_btf_write() { ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind"); ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); ASSERT_EQ(t->size, 4, "enum_fwd_sz"); + ASSERT_STREQ(btf_type_raw_dump(btf, 12), + "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump"); /* TYPEDEF */ id = btf__add_typedef(btf, "typedef1", 1); @@ -187,6 +217,8 @@ void test_btf_write() { ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name"); ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind"); ASSERT_EQ(t->type, 1, "typedef_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 13), + "[13] TYPEDEF 'typedef1' type_id=1", "raw_dump"); /* FUNC & FUNC_PROTO */ id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15); @@ -196,6 +228,8 @@ void test_btf_write() { ASSERT_EQ(t->type, 15, "func_type"); ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind"); ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen"); + ASSERT_STREQ(btf_type_raw_dump(btf, 14), + "[14] FUNC 'func1' type_id=15 linkage=global", "raw_dump"); id = btf__add_func_proto(btf, 1); ASSERT_EQ(id, 15, "func_proto_id"); @@ -214,6 +248,10 @@ void test_btf_write() { p = btf_params(t) + 1; ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name"); ASSERT_EQ(p->type, 2, "p2_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 15), + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=2", "raw_dump"); /* VAR */ id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1); @@ -223,6 +261,8 @@ void test_btf_write() { ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind"); ASSERT_EQ(t->type, 1, "var_type"); ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 16), + "[16] VAR 'var1' type_id=1, linkage=global-alloc", "raw_dump"); /* DATASECT */ id = btf__add_datasec(btf, "datasec1", 12); @@ -239,6 +279,9 @@ void test_btf_write() { ASSERT_EQ(vi->type, 1, "v1_type"); ASSERT_EQ(vi->offset, 4, "v1_off"); ASSERT_EQ(vi->size, 8, "v1_sz"); + ASSERT_STREQ(btf_type_raw_dump(btf, 17), + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", "raw_dump"); btf__free(btf); } -- cgit v1.3.1 From d8123624506cd62730c9cd9c7672c698e462703d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:57 -0800 Subject: libbpf: Fix BTF data layout checks and allow empty BTF Make data section layout checks stricter, disallowing overlap of types and strings data. Additionally, allow BTFs with no type data. There is nothing inherently wrong with having BTF with no types (put potentially with some strings). This could be a situation with kernel module BTFs, if module doesn't introduce any new type information. Also fix invalid offset alignment check for btf->hdr->type_off. Fixes: 8a138aed4a80 ("bpf: btf: Add BTF support to libbpf") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201105043402.2530976-8-andrii@kernel.org --- tools/lib/bpf/btf.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 0258cf108c0a..20bb88e71f07 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -245,22 +245,18 @@ static int btf_parse_hdr(struct btf *btf) return -EINVAL; } - if (meta_left < hdr->type_off) { - pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); + if (meta_left < hdr->str_off + hdr->str_len) { + pr_debug("Invalid BTF total size:%u\n", btf->raw_size); return -EINVAL; } - if (meta_left < hdr->str_off) { - pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); + if (hdr->type_off + hdr->type_len > hdr->str_off) { + pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", + hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); return -EINVAL; } - if (hdr->type_off >= hdr->str_off) { - pr_debug("BTF type section offset >= string section offset. No type?\n"); - return -EINVAL; - } - - if (hdr->type_off & 0x02) { + if (hdr->type_off % 4) { pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } -- cgit v1.3.1 From f86524efcf9e3f3a7cf75ebcd82cf8f58ec716cc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:58 -0800 Subject: libbpf: Support BTF dedup of split BTFs Add support for deduplication split BTFs. When deduplicating split BTF, base BTF is considered to be immutable and can't be modified or adjusted. 99% of BTF deduplication logic is left intact (module some type numbering adjustments). There are only two differences. First, each type in base BTF gets hashed (expect VAR and DATASEC, of course, those are always considered to be self-canonical instances) and added into a table of canonical table candidates. Hashing is a shallow, fast operation, so mostly eliminates the overhead of having entire base BTF to be a part of BTF dedup. Second difference is very critical and subtle. While deduplicating split BTF types, it is possible to discover that one of immutable base BTF BTF_KIND_FWD types can and should be resolved to a full STRUCT/UNION type from the split BTF part. This is, obviously, can't happen because we can't modify the base BTF types anymore. So because of that, any type in split BTF that directly or indirectly references that newly-to-be-resolved FWD type can't be considered to be equivalent to the corresponding canonical types in base BTF, because that would result in a loss of type resolution information. So in such case, split BTF types will be deduplicated separately and will cause some duplication of type information, which is unavoidable. With those two changes, the rest of the algorithm manages to deduplicate split BTF correctly, pointing all the duplicates to their canonical counter-parts in base BTF, but also is deduplicating whatever unique types are present in split BTF on their own. Also, theoretically, split BTF after deduplication could end up with either empty type section or empty string section. This is handled by libbpf correctly in one of previous patches in the series. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-9-andrii@kernel.org --- tools/lib/bpf/btf.c | 221 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 168 insertions(+), 53 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 20bb88e71f07..8d04d9becb67 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2718,6 +2718,7 @@ struct btf_dedup; static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, const struct btf_dedup_opts *opts); static void btf_dedup_free(struct btf_dedup *d); +static int btf_dedup_prep(struct btf_dedup *d); static int btf_dedup_strings(struct btf_dedup *d); static int btf_dedup_prim_types(struct btf_dedup *d); static int btf_dedup_struct_types(struct btf_dedup *d); @@ -2876,6 +2877,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, if (btf_ensure_modifiable(btf)) return -ENOMEM; + err = btf_dedup_prep(d); + if (err) { + pr_debug("btf_dedup_prep failed:%d\n", err); + goto done; + } err = btf_dedup_strings(d); if (err < 0) { pr_debug("btf_dedup_strings failed:%d\n", err); @@ -2938,6 +2944,13 @@ struct btf_dedup { __u32 *hypot_list; size_t hypot_cnt; size_t hypot_cap; + /* Whether hypothetical mapping, if successful, would need to adjust + * already canonicalized types (due to a new forward declaration to + * concrete type resolution). In such case, during split BTF dedup + * candidate type would still be considered as different, because base + * BTF is considered to be immutable. + */ + bool hypot_adjust_canon; /* Various option modifying behavior of algorithm */ struct btf_dedup_opts opts; /* temporary strings deduplication state */ @@ -2985,6 +2998,7 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d) for (i = 0; i < d->hypot_cnt; i++) d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; d->hypot_cnt = 0; + d->hypot_adjust_canon = false; } static void btf_dedup_free(struct btf_dedup *d) @@ -3024,7 +3038,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, { struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; - int i, err = 0; + int i, err = 0, type_cnt; if (!d) return ERR_PTR(-ENOMEM); @@ -3044,14 +3058,15 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, goto done; } - d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + type_cnt = btf__get_nr_types(btf) + 1; + d->map = malloc(sizeof(__u32) * type_cnt); if (!d->map) { err = -ENOMEM; goto done; } /* special BTF "void" type is made canonical immediately */ d->map[0] = 0; - for (i = 1; i <= btf->nr_types; i++) { + for (i = 1; i < type_cnt; i++) { struct btf_type *t = btf_type_by_id(d->btf, i); /* VAR and DATASEC are never deduped and are self-canonical */ @@ -3061,12 +3076,12 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, d->map[i] = BTF_UNPROCESSED_ID; } - d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + d->hypot_map = malloc(sizeof(__u32) * type_cnt); if (!d->hypot_map) { err = -ENOMEM; goto done; } - for (i = 0; i <= btf->nr_types; i++) + for (i = 0; i < type_cnt; i++) d->hypot_map[i] = BTF_UNPROCESSED_ID; done: @@ -3090,8 +3105,8 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) int i, j, r, rec_size; struct btf_type *t; - for (i = 1; i <= d->btf->nr_types; i++) { - t = btf_type_by_id(d->btf, i); + for (i = 0; i < d->btf->nr_types; i++) { + t = btf_type_by_id(d->btf, d->btf->start_id + i); r = fn(&t->name_off, ctx); if (r) return r; @@ -3174,15 +3189,27 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx) { struct btf_dedup *d = ctx; + __u32 str_off = *str_off_ptr; long old_off, new_off, len; const char *s; void *p; int err; - if (*str_off_ptr == 0) + /* don't touch empty string or string in main BTF */ + if (str_off == 0 || str_off < d->btf->start_str_off) return 0; - s = btf__str_by_offset(d->btf, *str_off_ptr); + s = btf__str_by_offset(d->btf, str_off); + if (d->btf->base_btf) { + err = btf__find_str(d->btf->base_btf, s); + if (err >= 0) { + *str_off_ptr = err; + return 0; + } + if (err != -ENOENT) + return err; + } + len = strlen(s) + 1; new_off = d->strs_len; @@ -3199,11 +3226,11 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx) err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off, HASHMAP_ADD, (const void **)&old_off, NULL); if (err == -EEXIST) { - *str_off_ptr = old_off; + *str_off_ptr = d->btf->start_str_off + old_off; } else if (err) { return err; } else { - *str_off_ptr = new_off; + *str_off_ptr = d->btf->start_str_off + new_off; d->strs_len += len; } return 0; @@ -3228,13 +3255,6 @@ static int btf_dedup_strings(struct btf_dedup *d) if (d->btf->strs_deduped) return 0; - s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1); - if (!s) - return -ENOMEM; - /* initial empty string */ - s[0] = 0; - d->strs_len = 1; - /* temporarily switch to use btf_dedup's strs_data for strings for hash * functions; later we'll just transfer hashmap to struct btf as is, * along the strs_data @@ -3248,13 +3268,22 @@ static int btf_dedup_strings(struct btf_dedup *d) goto err_out; } - /* insert empty string; we won't be looking it up during strings - * dedup, but it's good to have it for generic BTF string lookups - */ - err = hashmap__insert(d->strs_hash, (void *)0, (void *)0, - HASHMAP_ADD, NULL, NULL); - if (err) - goto err_out; + if (!d->btf->base_btf) { + s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1); + if (!s) + return -ENOMEM; + /* initial empty string */ + s[0] = 0; + d->strs_len = 1; + + /* insert empty string; we won't be looking it up during strings + * dedup, but it's good to have it for generic BTF string lookups + */ + err = hashmap__insert(d->strs_hash, (void *)0, (void *)0, + HASHMAP_ADD, NULL, NULL); + if (err) + goto err_out; + } /* remap string offsets */ err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d); @@ -3549,6 +3578,66 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) return true; } +/* Prepare split BTF for deduplication by calculating hashes of base BTF's + * types and initializing the rest of the state (canonical type mapping) for + * the fixed base BTF part. + */ +static int btf_dedup_prep(struct btf_dedup *d) +{ + struct btf_type *t; + int type_id; + long h; + + if (!d->btf->base_btf) + return 0; + + for (type_id = 1; type_id < d->btf->start_id; type_id++) { + t = btf_type_by_id(d->btf, type_id); + + /* all base BTF types are self-canonical by definition */ + d->map[type_id] = type_id; + + switch (btf_kind(t)) { + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: + /* VAR and DATASEC are never hash/deduplicated */ + continue; + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_FWD: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + h = btf_hash_common(t); + break; + case BTF_KIND_INT: + h = btf_hash_int(t); + break; + case BTF_KIND_ENUM: + h = btf_hash_enum(t); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + h = btf_hash_struct(t); + break; + case BTF_KIND_ARRAY: + h = btf_hash_array(t); + break; + case BTF_KIND_FUNC_PROTO: + h = btf_hash_fnproto(t); + break; + default: + pr_debug("unknown kind %d for type [%d]\n", btf_kind(t), type_id); + return -EINVAL; + } + if (btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + } + + return 0; +} + /* * Deduplicate primitive types, that can't reference other types, by calculating * their type signature hash and comparing them with any possible canonical @@ -3642,8 +3731,8 @@ static int btf_dedup_prim_types(struct btf_dedup *d) { int i, err; - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_prim_type(d, i); + for (i = 0; i < d->btf->nr_types; i++) { + err = btf_dedup_prim_type(d, d->btf->start_id + i); if (err) return err; } @@ -3833,6 +3922,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, } else { real_kind = cand_kind; fwd_kind = btf_fwd_kind(canon_type); + /* we'd need to resolve base FWD to STRUCT/UNION */ + if (fwd_kind == real_kind && canon_id < d->btf->start_id) + d->hypot_adjust_canon = true; } return fwd_kind == real_kind; } @@ -3870,8 +3962,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return 0; cand_arr = btf_array(cand_type); canon_arr = btf_array(canon_type); - eq = btf_dedup_is_equiv(d, - cand_arr->index_type, canon_arr->index_type); + eq = btf_dedup_is_equiv(d, cand_arr->index_type, canon_arr->index_type); if (eq <= 0) return eq; return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); @@ -3954,16 +4045,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, */ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) { - __u32 cand_type_id, targ_type_id; + __u32 canon_type_id, targ_type_id; __u16 t_kind, c_kind; __u32 t_id, c_id; int i; for (i = 0; i < d->hypot_cnt; i++) { - cand_type_id = d->hypot_list[i]; - targ_type_id = d->hypot_map[cand_type_id]; + canon_type_id = d->hypot_list[i]; + targ_type_id = d->hypot_map[canon_type_id]; t_id = resolve_type_id(d, targ_type_id); - c_id = resolve_type_id(d, cand_type_id); + c_id = resolve_type_id(d, canon_type_id); t_kind = btf_kind(btf__type_by_id(d->btf, t_id)); c_kind = btf_kind(btf__type_by_id(d->btf, c_id)); /* @@ -3978,9 +4069,26 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) * stability is not a requirement for STRUCT/UNION equivalence * checks, though. */ + + /* if it's the split BTF case, we still need to point base FWD + * to STRUCT/UNION in a split BTF, because FWDs from split BTF + * will be resolved against base FWD. If we don't point base + * canonical FWD to the resolved STRUCT/UNION, then all the + * FWDs in split BTF won't be correctly resolved to a proper + * STRUCT/UNION. + */ if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) d->map[c_id] = t_id; - else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) + + /* if graph equivalence determined that we'd need to adjust + * base canonical types, then we need to only point base FWDs + * to STRUCTs/UNIONs and do no more modifications. For all + * other purposes the type graphs were not equivalent. + */ + if (d->hypot_adjust_canon) + continue; + + if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) d->map[t_id] = c_id; if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && @@ -4064,8 +4172,10 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) return eq; if (!eq) continue; - new_id = cand_id; btf_dedup_merge_hypot_map(d); + if (d->hypot_adjust_canon) /* not really equivalent */ + continue; + new_id = cand_id; break; } @@ -4080,8 +4190,8 @@ static int btf_dedup_struct_types(struct btf_dedup *d) { int i, err; - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_struct_type(d, i); + for (i = 0; i < d->btf->nr_types; i++) { + err = btf_dedup_struct_type(d, d->btf->start_id + i); if (err) return err; } @@ -4224,8 +4334,8 @@ static int btf_dedup_ref_types(struct btf_dedup *d) { int i, err; - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_ref_type(d, i); + for (i = 0; i < d->btf->nr_types; i++) { + err = btf_dedup_ref_type(d, d->btf->start_id + i); if (err < 0) return err; } @@ -4249,39 +4359,44 @@ static int btf_dedup_ref_types(struct btf_dedup *d) static int btf_dedup_compact_types(struct btf_dedup *d) { __u32 *new_offs; - __u32 next_type_id = 1; + __u32 next_type_id = d->btf->start_id; + const struct btf_type *t; void *p; - int i, len; + int i, id, len; /* we are going to reuse hypot_map to store compaction remapping */ d->hypot_map[0] = 0; - for (i = 1; i <= d->btf->nr_types; i++) - d->hypot_map[i] = BTF_UNPROCESSED_ID; + /* base BTF types are not renumbered */ + for (id = 1; id < d->btf->start_id; id++) + d->hypot_map[id] = id; + for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) + d->hypot_map[id] = BTF_UNPROCESSED_ID; p = d->btf->types_data; - for (i = 1; i <= d->btf->nr_types; i++) { - if (d->map[i] != i) + for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) { + if (d->map[id] != id) continue; - len = btf_type_size(btf__type_by_id(d->btf, i)); + t = btf__type_by_id(d->btf, id); + len = btf_type_size(t); if (len < 0) return len; - memmove(p, btf__type_by_id(d->btf, i), len); - d->hypot_map[i] = next_type_id; - d->btf->type_offs[next_type_id - 1] = p - d->btf->types_data; + memmove(p, t, len); + d->hypot_map[id] = next_type_id; + d->btf->type_offs[next_type_id - d->btf->start_id] = p - d->btf->types_data; p += len; next_type_id++; } /* shrink struct btf's internal types index and update btf_header */ - d->btf->nr_types = next_type_id - 1; + d->btf->nr_types = next_type_id - d->btf->start_id; d->btf->type_offs_cap = d->btf->nr_types; d->btf->hdr->type_len = p - d->btf->types_data; new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, sizeof(*new_offs)); - if (!new_offs) + if (d->btf->type_offs_cap && !new_offs) return -ENOMEM; d->btf->type_offs = new_offs; d->btf->hdr->str_off = d->btf->hdr->type_len; @@ -4413,8 +4528,8 @@ static int btf_dedup_remap_types(struct btf_dedup *d) { int i, r; - for (i = 1; i <= d->btf->nr_types; i++) { - r = btf_dedup_remap_type(d, i); + for (i = 0; i < d->btf->nr_types; i++) { + r = btf_dedup_remap_type(d, d->btf->start_id + i); if (r < 0) return r; } -- cgit v1.3.1 From 6b6e6b1d09aa20c351a1fce0ea6402da436624a4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:33:59 -0800 Subject: libbpf: Accomodate DWARF/compiler bug with duplicated identical arrays In some cases compiler seems to generate distinct DWARF types for identical arrays within the same CU. That seems like a bug, but it's already out there and breaks type graph equivalence checks, so accommodate it anyway by checking for identical arrays, regardless of their type ID. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-10-andrii@kernel.org --- tools/lib/bpf/btf.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 8d04d9becb67..2d0d064c6d31 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -3785,6 +3785,19 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; } +/* Check if given two types are identical ARRAY definitions */ +static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +{ + struct btf_type *t1, *t2; + + t1 = btf_type_by_id(d->btf, id1); + t2 = btf_type_by_id(d->btf, id2); + if (!btf_is_array(t1) || !btf_is_array(t2)) + return 0; + + return btf_equal_array(t1, t2); +} + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -3895,8 +3908,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, canon_id = resolve_fwd_id(d, canon_id); hypot_type_id = d->hypot_map[canon_id]; - if (hypot_type_id <= BTF_MAX_NR_TYPES) - return hypot_type_id == cand_id; + if (hypot_type_id <= BTF_MAX_NR_TYPES) { + /* In some cases compiler will generate different DWARF types + * for *identical* array type definitions and use them for + * different fields within the *same* struct. This breaks type + * equivalence check, which makes an assumption that candidate + * types sub-graph has a consistent and deduped-by-compiler + * types within a single CU. So work around that by explicitly + * allowing identical array types here. + */ + return hypot_type_id == cand_id || + btf_dedup_identical_arrays(d, hypot_type_id, cand_id); + } if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) return -ENOMEM; -- cgit v1.3.1 From 232338fa2fb47726ab7c459419115a6ab6bfb3e3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:34:00 -0800 Subject: selftests/bpf: Add split BTF dedup selftests Add selftests validating BTF deduplication for split BTF case. Add a helper macro that allows to validate entire BTF with raw BTF dump, not just type-by-type. This saves tons of code and complexity. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-11-andrii@kernel.org --- tools/testing/selftests/bpf/btf_helpers.c | 59 ++++ tools/testing/selftests/bpf/btf_helpers.h | 7 + .../selftests/bpf/prog_tests/btf_dedup_split.c | 325 +++++++++++++++++++++ 3 files changed, 391 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index abc3f6c04cfc..48f90490f922 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -3,6 +3,8 @@ #include #include #include +#include +#include "test_progs.h" static const char * const btf_kind_str_mapping[] = { [BTF_KIND_UNKN] = "UNKNOWN", @@ -198,3 +200,60 @@ const char *btf_type_raw_dump(const struct btf *btf, int type_id) return buf; } + +int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]) +{ + int i; + bool ok = true; + + ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types"); + + for (i = 1; i <= nr_types; i++) { + if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump")) + ok = false; + } + + return ok; +} + +static void btf_dump_printf(void *ctx, const char *fmt, va_list args) +{ + vfprintf(ctx, fmt, args); +} + +/* Print BTF-to-C dump into a local buffer and return string pointer back. + * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls + */ +const char *btf_type_c_dump(const struct btf *btf) +{ + static char buf[16 * 1024]; + FILE *buf_file; + struct btf_dump *d = NULL; + struct btf_dump_opts opts = {}; + int err, i; + + buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); + if (!buf_file) { + fprintf(stderr, "Failed to open memstream: %d\n", errno); + return NULL; + } + + opts.ctx = buf_file; + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); + if (libbpf_get_error(d)) { + fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d)); + return NULL; + } + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + err = btf_dump__dump_type(d, i); + if (err) { + fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); + return NULL; + } + } + + fflush(buf_file); + fclose(buf_file); + return buf; +} diff --git a/tools/testing/selftests/bpf/btf_helpers.h b/tools/testing/selftests/bpf/btf_helpers.h index 2c9ce1b61dc9..295c0137d9bd 100644 --- a/tools/testing/selftests/bpf/btf_helpers.h +++ b/tools/testing/selftests/bpf/btf_helpers.h @@ -8,5 +8,12 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id); const char *btf_type_raw_dump(const struct btf *btf, int type_id); +int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]); +#define VALIDATE_RAW_BTF(btf, raw_types...) \ + btf_validate_raw(btf, \ + sizeof((const char *[]){raw_types})/sizeof(void *),\ + (const char *[]){raw_types}) + +const char *btf_type_c_dump(const struct btf *btf); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c new file mode 100644 index 000000000000..64554fd33547 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include +#include "btf_helpers.h" + +static void test_split_simple() { + const struct btf_type *t; + struct btf *btf1, *btf2; + int str_off, err; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 1); /* [2] ptr to int */ + btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + + ASSERT_STREQ(btf_type_c_dump(btf1), "\ +struct s1 {\n\ + int f1;\n\ +};\n\n", "c_dump"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + /* pointer size should be "inherited" from main BTF */ + ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz"); + + str_off = btf__find_str(btf2, "int"); + ASSERT_NEQ(str_off, -ENOENT, "str_int_missing"); + + t = btf__type_by_id(btf2, 1); + if (!ASSERT_OK_PTR(t, "int_type")) + goto cleanup; + ASSERT_EQ(btf_is_int(t), true, "int_kind"); + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name"); + + btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */ + btf__add_field(btf2, "f1", 6, 0, 0); /* struct s1 f1; */ + btf__add_field(btf2, "f2", 5, 32, 0); /* int f2; */ + btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */ + /* } */ + + /* duplicated int */ + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [5] int */ + + /* duplicated struct s1 */ + btf__add_struct(btf2, "s1", 4); /* [6] struct s1 { */ + btf__add_field(btf2, "f1", 5, 0, 0); /* int f1; */ + /* } */ + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[4] STRUCT 's2' size=16 vlen=3\n" + "\t'f1' type_id=6 bits_offset=0\n" + "\t'f2' type_id=5 bits_offset=32\n" + "\t'f3' type_id=2 bits_offset=64", + "[5] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[6] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=5 bits_offset=0"); + + ASSERT_STREQ(btf_type_c_dump(btf2), "\ +struct s1 {\n\ + int f1;\n\ +};\n\ +\n\ +struct s1___2 {\n\ + int f1;\n\ +};\n\ +\n\ +struct s2 {\n\ + struct s1___2 f1;\n\ + int f2;\n\ + int *f3;\n\ +};\n\n", "c_dump"); + + err = btf__dedup(btf2, NULL, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[4] STRUCT 's2' size=16 vlen=3\n" + "\t'f1' type_id=3 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32\n" + "\t'f3' type_id=2 bits_offset=64"); + + ASSERT_STREQ(btf_type_c_dump(btf2), "\ +struct s1 {\n\ + int f1;\n\ +};\n\ +\n\ +struct s2 {\n\ + struct s1 f1;\n\ + int f2;\n\ + int *f3;\n\ +};\n\n", "c_dump"); + +cleanup: + btf__free(btf2); + btf__free(btf1); +} + +static void test_split_fwd_resolve() { + struct btf *btf1, *btf2; + int err; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 4); /* [2] ptr to struct s1 */ + btf__add_ptr(btf1, 5); /* [3] ptr to struct s2 */ + btf__add_struct(btf1, "s1", 16); /* [4] struct s1 { */ + btf__add_field(btf1, "f1", 2, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf1, "f2", 3, 64, 0); /* struct s2 *f2; */ + /* } */ + btf__add_struct(btf1, "s2", 4); /* [5] struct s2 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=4", + "[3] PTR '(anon)' type_id=5", + "[4] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=64", + "[5] STRUCT 's2' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ + btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */ + btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */ + btf__add_ptr(btf2, 8); /* [9] ptr to fwd struct s2 */ + btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */ + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ + /* } */ + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=4", + "[3] PTR '(anon)' type_id=5", + "[4] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=64", + "[5] STRUCT 's2' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[7] PTR '(anon)' type_id=10", + "[8] FWD 's2' fwd_kind=struct", + "[9] PTR '(anon)' type_id=8", + "[10] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=7 bits_offset=0\n" + "\t'f2' type_id=9 bits_offset=64"); + + err = btf__dedup(btf2, NULL, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=4", + "[3] PTR '(anon)' type_id=5", + "[4] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=64", + "[5] STRUCT 's2' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + +cleanup: + btf__free(btf2); + btf__free(btf1); +} + +static void test_split_struct_duped() { + struct btf *btf1, *btf2; + int err; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 5); /* [2] ptr to struct s1 */ + btf__add_fwd(btf1, "s2", BTF_FWD_STRUCT); /* [3] fwd for struct s2 */ + btf__add_ptr(btf1, 3); /* [4] ptr to fwd struct s2 */ + btf__add_struct(btf1, "s1", 16); /* [5] struct s1 { */ + btf__add_field(btf1, "f1", 2, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf1, "f2", 4, 64, 0); /* struct s2 *f2; */ + /* } */ + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=5", + "[3] FWD 's2' fwd_kind=struct", + "[4] PTR '(anon)' type_id=3", + "[5] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=64"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ + btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */ + btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */ + btf__add_ptr(btf2, 11); /* [9] ptr to struct s2 */ + btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */ + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ + /* } */ + btf__add_struct(btf2, "s2", 40); /* [11] struct s2 { */ + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ + btf__add_field(btf2, "f3", 6, 128, 0); /* int f3; */ + btf__add_field(btf2, "f4", 10, 192, 0); /* struct s1 f4; */ + /* } */ + btf__add_ptr(btf2, 8); /* [12] ptr to fwd struct s2 */ + btf__add_struct(btf2, "s3", 8); /* [13] struct s3 { */ + btf__add_field(btf2, "f1", 12, 0, 0); /* struct s2 *f1; (fwd) */ + /* } */ + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=5", + "[3] FWD 's2' fwd_kind=struct", + "[4] PTR '(anon)' type_id=3", + "[5] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=64", + "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[7] PTR '(anon)' type_id=10", + "[8] FWD 's2' fwd_kind=struct", + "[9] PTR '(anon)' type_id=11", + "[10] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=7 bits_offset=0\n" + "\t'f2' type_id=9 bits_offset=64", + "[11] STRUCT 's2' size=40 vlen=4\n" + "\t'f1' type_id=7 bits_offset=0\n" + "\t'f2' type_id=9 bits_offset=64\n" + "\t'f3' type_id=6 bits_offset=128\n" + "\t'f4' type_id=10 bits_offset=192", + "[12] PTR '(anon)' type_id=8", + "[13] STRUCT 's3' size=8 vlen=1\n" + "\t'f1' type_id=12 bits_offset=0"); + + err = btf__dedup(btf2, NULL, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=5", + "[3] FWD 's2' fwd_kind=struct", + "[4] PTR '(anon)' type_id=3", + "[5] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=64", + "[6] PTR '(anon)' type_id=8", + "[7] PTR '(anon)' type_id=9", + "[8] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=6 bits_offset=0\n" + "\t'f2' type_id=7 bits_offset=64", + "[9] STRUCT 's2' size=40 vlen=4\n" + "\t'f1' type_id=6 bits_offset=0\n" + "\t'f2' type_id=7 bits_offset=64\n" + "\t'f3' type_id=1 bits_offset=128\n" + "\t'f4' type_id=8 bits_offset=192", + "[10] STRUCT 's3' size=8 vlen=1\n" + "\t'f1' type_id=7 bits_offset=0"); + +cleanup: + btf__free(btf2); + btf__free(btf1); +} + +void test_btf_dedup_split() +{ + if (test__start_subtest("split_simple")) + test_split_simple(); + if (test__start_subtest("split_struct_duped")) + test_split_struct_duped(); + if (test__start_subtest("split_fwd_resolve")) + test_split_fwd_resolve(); +} -- cgit v1.3.1 From 75fa1777694c245c1e59ac774cb1d58a15ecefeb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Nov 2020 20:34:01 -0800 Subject: tools/bpftool: Add bpftool support for split BTF Add ability to work with split BTF by providing extra -B flag, which allows to specify the path to the base BTF file. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105043402.2530976-12-andrii@kernel.org --- tools/bpf/bpftool/btf.c | 9 ++++++--- tools/bpf/bpftool/main.c | 15 ++++++++++++++- tools/bpf/bpftool/main.h | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 8ab142ff5eac..c96b56e8e3a4 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -358,8 +358,12 @@ static int dump_btf_raw(const struct btf *btf, } } else { int cnt = btf__get_nr_types(btf); + int start_id = 1; - for (i = 1; i <= cnt; i++) { + if (base_btf) + start_id = btf__get_nr_types(base_btf) + 1; + + for (i = start_id; i <= cnt; i++) { t = btf__type_by_id(btf, i); dump_btf_type(btf, i, t); } @@ -438,7 +442,6 @@ static int do_dump(int argc, char **argv) return -1; } src = GET_ARG(); - if (is_prefix(src, "map")) { struct bpf_map_info info = {}; __u32 len = sizeof(info); @@ -499,7 +502,7 @@ static int do_dump(int argc, char **argv) } NEXT_ARG(); } else if (is_prefix(src, "file")) { - btf = btf__parse(*argv, NULL); + btf = btf__parse_split(*argv, base_btf); if (IS_ERR(btf)) { err = -PTR_ERR(btf); btf = NULL; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 682daaa49e6a..b86f450e6fce 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -11,6 +11,7 @@ #include #include +#include #include "main.h" @@ -28,6 +29,7 @@ bool show_pinned; bool block_mount; bool verifier_logs; bool relaxed_maps; +struct btf *base_btf; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; struct pinned_obj_table link_table; @@ -391,6 +393,7 @@ int main(int argc, char **argv) { "mapcompat", no_argument, NULL, 'm' }, { "nomount", no_argument, NULL, 'n' }, { "debug", no_argument, NULL, 'd' }, + { "base-btf", required_argument, NULL, 'B' }, { 0 } }; int opt, ret; @@ -407,7 +410,7 @@ int main(int argc, char **argv) hash_init(link_table.table); opterr = 0; - while ((opt = getopt_long(argc, argv, "Vhpjfmnd", + while ((opt = getopt_long(argc, argv, "VhpjfmndB:", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -441,6 +444,15 @@ int main(int argc, char **argv) libbpf_set_print(print_all_levels); verifier_logs = true; break; + case 'B': + base_btf = btf__parse(optarg, NULL); + if (libbpf_get_error(base_btf)) { + p_err("failed to parse base BTF at '%s': %ld\n", + optarg, libbpf_get_error(base_btf)); + base_btf = NULL; + return -1; + } + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -465,6 +477,7 @@ int main(int argc, char **argv) delete_pinned_obj_table(&map_table); delete_pinned_obj_table(&link_table); } + btf__free(base_btf); return ret; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index c46e52137b87..76e91641262b 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -90,6 +90,7 @@ extern bool show_pids; extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; +extern struct btf *base_btf; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; extern struct pinned_obj_table link_table; -- cgit v1.3.1 From d3bec0138bfbe58606fc1d6f57a4cdc1a20218db Mon Sep 17 00:00:00 2001 From: David Verbeiren Date: Wed, 4 Nov 2020 12:23:32 +0100 Subject: bpf: Zero-fill re-used per-cpu map element Zero-fill element values for all other cpus than current, just as when not using prealloc. This is the only way the bpf program can ensure known initial values for all cpus ('onallcpus' cannot be set when coming from the bpf program). The scenario is: bpf program inserts some elements in a per-cpu map, then deletes some (or userspace does). When later adding new elements using bpf_map_update_elem(), the bpf program can only set the value of the new elements for the current cpu. When prealloc is enabled, previously deleted elements are re-used. Without the fix, values for other cpus remain whatever they were when the re-used entry was previously freed. A selftest is added to validate correct operation in above scenario as well as in case of LRU per-cpu map element re-use. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: David Verbeiren Signed-off-by: Alexei Starovoitov Acked-by: Matthieu Baerts Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201104112332.15191-1-david.verbeiren@tessares.net --- kernel/bpf/hashtab.c | 30 ++- tools/testing/selftests/bpf/prog_tests/map_init.c | 214 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_map_init.c | 33 ++++ 3 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/map_init.c create mode 100644 tools/testing/selftests/bpf/progs/test_map_init.c (limited to 'tools') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 1815e97d4c9c..1fccba6e88c4 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -821,6 +821,32 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, } } +static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, + void *value, bool onallcpus) +{ + /* When using prealloc and not setting the initial value on all cpus, + * zero-fill element values for other cpus (just as what happens when + * not using prealloc). Otherwise, bpf program has no way to ensure + * known initial values for cpus other than current one + * (onallcpus=false always when coming from bpf prog). + */ + if (htab_is_prealloc(htab) && !onallcpus) { + u32 size = round_up(htab->map.value_size, 8); + int current_cpu = raw_smp_processor_id(); + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu == current_cpu) + bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value, + size); + else + memset(per_cpu_ptr(pptr, cpu), 0, size); + } + } else { + pcpu_copy_value(htab, pptr, value, onallcpus); + } +} + static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) { return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS && @@ -891,7 +917,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } } - pcpu_copy_value(htab, pptr, value, onallcpus); + pcpu_init_value(htab, pptr, value, onallcpus); if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); @@ -1183,7 +1209,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), value, onallcpus); } else { - pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), + pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), value, onallcpus); hlist_nulls_add_head_rcu(&l_new->hash_node, head); l_new = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c new file mode 100644 index 000000000000..14a31109dd0e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Tessares SA */ + +#include +#include "test_map_init.skel.h" + +#define TEST_VALUE 0x1234 +#define FILL_VALUE 0xdeadbeef + +static int nr_cpus; +static int duration; + +typedef unsigned long long map_key_t; +typedef unsigned long long map_value_t; +typedef struct { + map_value_t v; /* padding */ +} __bpf_percpu_val_align pcpu_map_value_t; + + +static int map_populate(int map_fd, int num) +{ + pcpu_map_value_t value[nr_cpus]; + int i, err; + map_key_t key; + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value, i) = FILL_VALUE; + + for (key = 1; key <= num; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz, + int *map_fd, int populate) +{ + struct test_map_init *skel; + int err; + + skel = test_map_init__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + err = bpf_map__set_type(skel->maps.hashmap1, map_type); + if (!ASSERT_OK(err, "bpf_map__set_type")) + goto error; + + err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto error; + + err = test_map_init__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto error; + + *map_fd = bpf_map__fd(skel->maps.hashmap1); + if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n")) + goto error; + + err = map_populate(*map_fd, populate); + if (!ASSERT_OK(err, "map_populate")) + goto error_map; + + return skel; + +error_map: + close(*map_fd); +error: + test_map_init__destroy(skel); + return NULL; +} + +/* executes bpf program that updates map with key, value */ +static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key, + map_value_t value) +{ + struct test_map_init__bss *bss; + + bss = skel->bss; + + bss->inKey = key; + bss->inValue = value; + bss->inPid = getpid(); + + if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach")) + return -1; + + /* Let tracepoint trigger */ + syscall(__NR_getpgid); + + test_map_init__detach(skel); + + return 0; +} + +static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected) +{ + int i, nzCnt = 0; + map_value_t val; + + for (i = 0; i < nr_cpus; i++) { + val = bpf_percpu(value, i); + if (val) { + if (CHECK(val != expected, "map value", + "unexpected for cpu %d: 0x%llx\n", i, val)) + return -1; + nzCnt++; + } + } + + if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n", + nzCnt)) + return -1; + + return 0; +} + +/* Add key=1 elem with values set for all CPUs + * Delete elem key=1 + * Run bpf prog that inserts new key=1 elem with value=0x1234 + * (bpf prog can only set value for current CPU) + * Lookup Key=1 and check value is as expected for all CPUs: + * value set by bpf prog for one CPU, 0 for all others + */ +static void test_pcpu_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* max 1 elem in map so insertion is forced to reuse freed entry */ + skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* delete element so the entry can be re-used*/ + key = 1; + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "bpf_map_delete_elem")) + goto cleanup; + + /* run bpf prog that inserts new elem, re-using the slot just freed */ + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=1 was re-created by bpf prog */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +/* Add key=1 and key=2 elems with values set for all CPUs + * Run bpf prog that inserts new key=3 elem + * (only for current cpu; other cpus should have initial value = 0) + * Lookup Key=1 and check value is as expected for all CPUs + */ +static void test_pcpu_lru_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* Set up LRU map with 2 elements, values filled for all CPUs. + * With these 2 elements, the LRU map is full + */ + skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* run bpf prog that inserts new key=3 element, re-using LRU slot */ + key = 3; + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=3 replaced one of earlier elements */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +void test_map_init(void) +{ + nr_cpus = bpf_num_possible_cpus(); + if (nr_cpus <= 1) { + printf("%s:SKIP: >1 cpu needed for this test\n", __func__); + test__skip(); + return; + } + + if (test__start_subtest("pcpu_map_init")) + test_pcpu_map_init(); + if (test__start_subtest("pcpu_lru_map_init")) + test_pcpu_lru_map_init(); +} diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c new file mode 100644 index 000000000000..c89d28ead673 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_init.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Tessares SA */ + +#include "vmlinux.h" +#include + +__u64 inKey = 0; +__u64 inValue = 0; +__u32 inPid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hashmap1 SEC(".maps"); + + +SEC("tp/syscalls/sys_enter_getpgid") +int sysenter_getpgid(const void *ctx) +{ + /* Just do it for once, when called from our own test prog. This + * ensures the map value is only updated for a single CPU. + */ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid == inPid) + bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From c6bde958a62b8ca5ee8d2c1fe429aec4ad54efad Mon Sep 17 00:00:00 2001 From: Florian Lehner Date: Thu, 29 Oct 2020 21:14:42 +0100 Subject: bpf: Lift hashtab key_size limit Currently key_size of hashtab is limited to MAX_BPF_STACK. As the key of hashtab can also be a value from a per cpu map it can be larger than MAX_BPF_STACK. The use-case for this patch originates to implement allow/disallow lists for files and file paths. The maximum length of file paths is defined by PATH_MAX with 4096 chars including nul. This limit exceeds MAX_BPF_STACK. Changelog: v5: - Fix cast overflow v4: - Utilize BPF skeleton in tests - Rebase v3: - Rebase v2: - Add a test for bpf side Signed-off-by: Florian Lehner Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201029201442.596690-1-dev@der-flo.net --- kernel/bpf/hashtab.c | 16 +++----- .../selftests/bpf/prog_tests/hash_large_key.c | 43 +++++++++++++++++++++ .../selftests/bpf/progs/test_hash_large_key.c | 44 ++++++++++++++++++++++ tools/testing/selftests/bpf/test_maps.c | 3 +- 4 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/hash_large_key.c create mode 100644 tools/testing/selftests/bpf/progs/test_hash_large_key.c (limited to 'tools') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 23f73d4649c9..7bf18d92af41 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -415,17 +415,11 @@ static int htab_map_alloc_check(union bpf_attr *attr) attr->value_size == 0) return -EINVAL; - if (attr->key_size > MAX_BPF_STACK) - /* eBPF programs initialize keys on stack, so they cannot be - * larger than max stack size - */ - return -E2BIG; - - if (attr->value_size >= KMALLOC_MAX_SIZE - - MAX_BPF_STACK - sizeof(struct htab_elem)) - /* if value_size is bigger, the user space won't be able to - * access the elements via bpf syscall. This check also makes - * sure that the elem_size doesn't overflow and it's + if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - + sizeof(struct htab_elem)) + /* if key_size + value_size is bigger, the user space won't be + * able to access the elements via bpf syscall. This check + * also makes sure that the elem_size doesn't overflow and it's * kmalloc-able later in htab_map_update_elem() */ return -E2BIG; diff --git a/tools/testing/selftests/bpf/prog_tests/hash_large_key.c b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c new file mode 100644 index 000000000000..34684c0fc76d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "test_hash_large_key.skel.h" + +void test_hash_large_key(void) +{ + int err, value = 21, duration = 0, hash_map_fd; + struct test_hash_large_key *skel; + + struct bigelement { + int a; + char b[4096]; + long long c; + } key; + bzero(&key, sizeof(key)); + + skel = test_hash_large_key__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) + return; + + hash_map_fd = bpf_map__fd(skel->maps.hash_map); + if (CHECK(hash_map_fd < 0, "bpf_map__fd", "failed\n")) + goto cleanup; + + err = test_hash_large_key__attach(skel); + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) + goto cleanup; + + err = bpf_map_update_elem(hash_map_fd, &key, &value, BPF_ANY); + if (CHECK(err, "bpf_map_update_elem", "errno=%d\n", errno)) + goto cleanup; + + key.c = 1; + err = bpf_map_lookup_elem(hash_map_fd, &key, &value); + if (CHECK(err, "bpf_map_lookup_elem", "errno=%d\n", errno)) + goto cleanup; + + CHECK_FAIL(value != 42); + +cleanup: + test_hash_large_key__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c new file mode 100644 index 000000000000..473a22794a62 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __type(key, struct bigelement); + __type(value, __u32); +} hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct bigelement); +} key_map SEC(".maps"); + +struct bigelement { + int a; + char b[4096]; + long long c; +}; + +SEC("raw_tracepoint/sys_enter") +int bpf_hash_large_key_test(void *ctx) +{ + int zero = 0, err = 1, value = 42; + struct bigelement *key; + + key = bpf_map_lookup_elem(&key_map, &zero); + if (!key) + return 0; + + key->c = 1; + if (bpf_map_update_elem(&hash_map, key, &value, BPF_ANY)) + return 0; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 0d92ebcb335d..0ad3e6305ff0 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1223,9 +1223,10 @@ out_map_in_map: static void test_map_large(void) { + struct bigkey { int a; - char b[116]; + char b[4096]; long long c; } key; int fd, i, value; -- cgit v1.3.1 From 4cf1bc1f10452065a29d576fc5693fc4fab5b919 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:40 +0000 Subject: bpf: Implement task local storage Similar to bpf_local_storage for sockets and inodes add local storage for task_struct. The life-cycle of storage is managed with the life-cycle of the task_struct. i.e. the storage is destroyed along with the owning task with a callback to the bpf_task_storage_free from the task_free LSM hook. The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the security blob which are now stackable and can co-exist with other LSMs. The userspace map operations can be done by using a pid fd as a key passed to the lookup, update and delete operations. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-3-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 23 +++ include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 39 +++++ kernel/bpf/Makefile | 1 + kernel/bpf/bpf_lsm.c | 4 + kernel/bpf/bpf_task_storage.c | 315 +++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 3 +- kernel/bpf/verifier.c | 10 ++ security/bpf/hooks.c | 2 + tools/include/uapi/linux/bpf.h | 39 +++++ 10 files changed, 436 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/bpf_task_storage.c (limited to 'tools') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index aaacb6aafc87..73226181b744 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -7,6 +7,7 @@ #ifndef _LINUX_BPF_LSM_H #define _LINUX_BPF_LSM_H +#include #include #include @@ -35,9 +36,21 @@ static inline struct bpf_storage_blob *bpf_inode( return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + if (unlikely(!task->security)) + return NULL; + + return task->security + bpf_lsm_blob_sizes.lbs_task; +} + extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; +extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); +void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ @@ -53,10 +66,20 @@ static inline struct bpf_storage_blob *bpf_inode( return NULL; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + return NULL; +} + static inline void bpf_inode_storage_free(struct inode *inode) { } +static inline void bpf_task_storage_free(struct task_struct *task) +{ +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2e6f568377f1..99f7fd657d87 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -109,6 +109,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e6ceac3f7d62..f4037b2161a6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -157,6 +157,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, }; /* Note that tracing related programs such as @@ -3742,6 +3743,42 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be an task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3900,6 +3937,8 @@ union bpf_attr { FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ FN(redirect_peer), \ + FN(task_storage_get), \ + FN(task_storage_delete), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index bdc8cd1b6767..f0b93ced5a7f 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o +obj-${CONFIG_BPF_LSM} += bpf_task_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index cd8a617f2109..e92c51bebb47 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -63,6 +63,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_spin_lock_proto; case BPF_FUNC_spin_unlock: return &bpf_spin_unlock_proto; + case BPF_FUNC_task_storage_get: + return &bpf_task_storage_get_proto; + case BPF_FUNC_task_storage_delete: + return &bpf_task_storage_delete_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c new file mode 100644 index 000000000000..39a45fba4fb0 --- /dev/null +++ b/kernel/bpf/bpf_task_storage.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Facebook + * Copyright 2020 Google LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_BPF_STORAGE_CACHE(task_cache); + +static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) +{ + struct task_struct *task = owner; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + return &bsb->storage; +} + +static struct bpf_local_storage_data * +task_storage_lookup(struct task_struct *task, struct bpf_map *map, + bool cacheit_lockit) +{ + struct bpf_local_storage *task_storage; + struct bpf_local_storage_map *smap; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + + task_storage = rcu_dereference(bsb->storage); + if (!task_storage) + return NULL; + + smap = (struct bpf_local_storage_map *)map; + return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); +} + +void bpf_task_storage_free(struct task_struct *task) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage *local_storage; + bool free_task_storage = false; + struct bpf_storage_blob *bsb; + struct hlist_node *n; + + bsb = bpf_task(task); + if (!bsb) + return; + + rcu_read_lock(); + + local_storage = rcu_dereference(bsb->storage); + if (!local_storage) { + rcu_read_unlock(); + return; + } + + /* Neither the bpf_prog nor the bpf-map's syscall + * could be modifying the local_storage->list now. + * Thus, no elem can be added-to or deleted-from the + * local_storage->list by the bpf_prog or by the bpf-map's syscall. + * + * It is racing with bpf_local_storage_map_free() alone + * when unlinking elem from the local_storage->list and + * the map's bucket->list. + */ + raw_spin_lock_bh(&local_storage->lock); + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { + /* Always unlink from map before unlinking from + * local_storage. + */ + bpf_selem_unlink_map(selem); + free_task_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, false); + } + raw_spin_unlock_bh(&local_storage->lock); + rcu_read_unlock(); + + /* free_task_storage should always be true as long as + * local_storage->list was non-empty. + */ + if (free_task_storage) + kfree_rcu(local_storage, rcu); +} + +static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return ERR_CAST(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + sdata = task_storage_lookup(task, map, true); + put_pid(pid); + return sdata ? sdata->data : NULL; +out: + put_pid(pid); + return ERR_PTR(err); +} + +static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, map_flags); + + err = PTR_ERR_OR_ZERO(sdata); +out: + put_pid(pid); + return err; +} + +static int task_storage_delete(struct task_struct *task, struct bpf_map *map) +{ + struct bpf_local_storage_data *sdata; + + sdata = task_storage_lookup(task, map, false); + if (!sdata) + return -ENOENT; + + bpf_selem_unlink(SELEM(sdata)); + + return 0; +} + +static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + err = task_storage_delete(task, map); +out: + put_pid(pid); + return err; +} + +BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags) +{ + struct bpf_local_storage_data *sdata; + + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + /* explicitly check that the task_storage_ptr is not + * NULL as task_storage_lookup returns NULL in this case and + * bpf_local_storage_update expects the owner to have a + * valid storage pointer. + */ + if (!task_storage_ptr(task)) + return (unsigned long)NULL; + + sdata = task_storage_lookup(task, map, true); + if (sdata) + return (unsigned long)sdata->data; + + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, + BPF_NOEXIST); + return IS_ERR(sdata) ? (unsigned long)NULL : + (unsigned long)sdata->data; + } + + return (unsigned long)NULL; +} + +BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, + task) +{ + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + return task_storage_delete(task, map); +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + +static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); + return &smap->map; +} + +static void task_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); + bpf_local_storage_map_free(smap); +} + +static int task_storage_map_btf_id; +const struct bpf_map_ops task_storage_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bpf_local_storage_map_alloc_check, + .map_alloc = task_storage_map_alloc, + .map_free = task_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_pid_task_storage_lookup_elem, + .map_update_elem = bpf_pid_task_storage_update_elem, + .map_delete_elem = bpf_pid_task_storage_delete_elem, + .map_check_btf = bpf_local_storage_map_check_btf, + .map_btf_name = "bpf_local_storage_map", + .map_btf_id = &task_storage_map_btf_id, + .map_owner_storage_ptr = task_storage_ptr, +}; + +BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct) + +const struct bpf_func_proto bpf_task_storage_get_proto = { + .func = bpf_task_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + +const struct bpf_func_proto bpf_task_storage_delete_proto = { + .func = bpf_task_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8f50c9c19f1b..f3fe9f53f93c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->map_type != BPF_MAP_TYPE_ARRAY && map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_SK_STORAGE && - map->map_type != BPF_MAP_TYPE_INODE_STORAGE) + map->map_type != BPF_MAP_TYPE_INODE_STORAGE && + map->map_type != BPF_MAP_TYPE_TASK_STORAGE) return -ENOTSUPP; if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f863aa84d0a2..00960f6a83ec 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4469,6 +4469,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_inode_storage_delete) goto error; break; + case BPF_MAP_TYPE_TASK_STORAGE: + if (func_id != BPF_FUNC_task_storage_get && + func_id != BPF_FUNC_task_storage_delete) + goto error; + break; default: break; } @@ -4547,6 +4552,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) goto error; break; + case BPF_FUNC_task_storage_get: + case BPF_FUNC_task_storage_delete: + if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) + goto error; + break; default: break; } diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c index 788667d582ae..e5971fa74fd7 100644 --- a/security/bpf/hooks.c +++ b/security/bpf/hooks.c @@ -12,6 +12,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = { #include #undef LSM_HOOK LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free), + LSM_HOOK_INIT(task_free, bpf_task_storage_free), }; static int __init bpf_lsm_init(void) @@ -23,6 +24,7 @@ static int __init bpf_lsm_init(void) struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = { .lbs_inode = sizeof(struct bpf_storage_blob), + .lbs_task = sizeof(struct bpf_storage_blob), }; DEFINE_LSM(bpf) = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e6ceac3f7d62..f4037b2161a6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -157,6 +157,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, }; /* Note that tracing related programs such as @@ -3742,6 +3743,42 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be an task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3900,6 +3937,8 @@ union bpf_attr { FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ FN(redirect_peer), \ + FN(task_storage_get), \ + FN(task_storage_delete), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 8885274d225985807deeb95de74642073c3b97bb Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:41 +0000 Subject: libbpf: Add support for task local storage Updates the bpf_probe_map_type API to also support BPF_MAP_TYPE_TASK_STORAGE similar to other local storage maps. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-4-kpsingh@chromium.org --- tools/lib/bpf/libbpf_probes.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 5482a9b7ae2d..ecaae2927ab8 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -230,6 +230,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) break; case BPF_MAP_TYPE_SK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: btf_key_type_id = 1; btf_value_type_id = 3; value_size = 8; -- cgit v1.3.1 From 864ab0616dccf14e51618a24acc7cf23ddd2b98a Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:42 +0000 Subject: bpftool: Add support for task local storage Updates the binary to handle the BPF_MAP_TYPE_TASK_STORAGE as "task_storage" for printing and parsing. Also updates the documentation and bash completion Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-5-kpsingh@chromium.org --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 ++- tools/bpf/bpftool/bash-completion/bpftool | 2 +- tools/bpf/bpftool/map.c | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index dade10cdf295..3d52256ba75f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -50,7 +50,8 @@ MAP COMMANDS | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** -| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** } +| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** + | **task_storage** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 3f1da30c4da6..fdffbc64c65c 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -705,7 +705,7 @@ _bpftool() hash_of_maps devmap devmap_hash sockmap cpumap \ xskmap sockhash cgroup_storage reuseport_sockarray \ percpu_cgroup_storage queue stack sk_storage \ - struct_ops inode_storage' -- \ + struct_ops inode_storage task_storage' -- \ "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a7efbd84fbcc..b400364ee054 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -51,6 +51,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", [BPF_MAP_TYPE_RINGBUF] = "ringbuf", [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@ -1464,7 +1465,8 @@ static int do_help(int argc, char **argv) " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" - " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n" + " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" + " task_storage }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2]); -- cgit v1.3.1 From 3ca1032ab7ab010eccb107aa515598788f7d93bb Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:43 +0000 Subject: bpf: Implement get_current_task_btf and RET_PTR_TO_BTF_ID The currently available bpf_get_current_task returns an unsigned integer which can be used along with BPF_CORE_READ to read data from the task_struct but still cannot be used as an input argument to a helper that accepts an ARG_PTR_TO_BTF_ID of type task_struct. In order to implement this helper a new return type, RET_PTR_TO_BTF_ID, is added. This is similar to RET_PTR_TO_BTF_ID_OR_NULL but does not require checking the nullness of returned pointer. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-6-kpsingh@chromium.org --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 9 +++++++++ kernel/bpf/verifier.c | 7 +++++-- kernel/trace/bpf_trace.c | 16 ++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 5 files changed, 40 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2fffd30e13ac..73d5381a5d5c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -310,6 +310,7 @@ enum bpf_return_type { RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ + RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4037b2161a6..9879d6793e90 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3779,6 +3779,14 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3939,6 +3947,7 @@ union bpf_attr { FN(redirect_peer), \ FN(task_storage_get), \ FN(task_storage_delete), \ + FN(get_current_task_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 00960f6a83ec..10da26e55130 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5186,11 +5186,14 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } - } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || + fn->ret_type == RET_PTR_TO_BTF_ID) { int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; + regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? + PTR_TO_BTF_ID : + PTR_TO_BTF_ID_OR_NULL; ret_btf_id = *fn->ret_btf_id; if (ret_btf_id == 0) { verbose(env, "invalid return type %d of func %s#%d\n", diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4517c8b66518..e4515b0f62a8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1022,6 +1022,20 @@ const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_get_current_task_btf) +{ + return (unsigned long) current; +} + +BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct) + +static const struct bpf_func_proto bpf_get_current_task_btf_proto = { + .func = bpf_get_current_task_btf, + .gpl_only = true, + .ret_type = RET_PTR_TO_BTF_ID, + .ret_btf_id = &bpf_get_current_btf_ids[0], +}; + BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -1265,6 +1279,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; + case BPF_FUNC_get_current_task_btf: + return &bpf_get_current_task_btf_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_current_comm: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f4037b2161a6..9879d6793e90 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3779,6 +3779,14 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3939,6 +3947,7 @@ union bpf_attr { FN(redirect_peer), \ FN(task_storage_get), \ FN(task_storage_delete), \ + FN(get_current_task_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From f0e5ba0bc481df77cf0afac2b33e420b33eeb463 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:44 +0000 Subject: bpf: Fix tests for local_storage The {inode,sk}_storage_result checking if the correct value was retrieved was being clobbered unconditionally by the return value of the bpf_{inode,sk}_storage_delete call. Also, consistently use the newly added BPF_LOCAL_STORAGE_GET_F_CREATE flag. Fixes: cd324d7abb3d ("bpf: Add selftests for local_storage") Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201106103747.2780972-7-kpsingh@chromium.org --- tools/testing/selftests/bpf/progs/local_storage.c | 24 ++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 0758ba229ae0..09529e33be98 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -58,20 +58,22 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) { __u32 pid = bpf_get_current_pid_tgid() >> 32; struct dummy_storage *storage; + int err; if (pid != monitored_pid) return 0; storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; - if (storage->value == DUMMY_STORAGE_VALUE) + if (storage->value != DUMMY_STORAGE_VALUE) inode_storage_result = -1; - inode_storage_result = - bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + if (!err) + inode_storage_result = err; return 0; } @@ -82,19 +84,23 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, { __u32 pid = bpf_get_current_pid_tgid() >> 32; struct dummy_storage *storage; + int err; if (pid != monitored_pid) return 0; storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; - if (storage->value == DUMMY_STORAGE_VALUE) + if (storage->value != DUMMY_STORAGE_VALUE) sk_storage_result = -1; - sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + if (!err) + sk_storage_result = err; + return 0; } @@ -109,7 +115,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, return 0; storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; @@ -131,7 +137,7 @@ int BPF_PROG(file_open, struct file *file) return 0; storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; -- cgit v1.3.1 From a367efa71b3f5a53281ca9772f8bf43166dfdf5f Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:45 +0000 Subject: bpf: Update selftests for local_storage to use vmlinux.h With the fixing of BTF pruning of embedded types being fixed, the test can be simplified to use vmlinux.h Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201106103747.2780972-8-kpsingh@chromium.org --- tools/testing/selftests/bpf/progs/local_storage.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 09529e33be98..ef3822bc7542 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -4,9 +4,8 @@ * Copyright 2020 Google LLC. */ +#include "vmlinux.h" #include -#include -#include #include #include @@ -36,23 +35,6 @@ struct { __type(value, struct dummy_storage); } sk_storage_map SEC(".maps"); -/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed. - */ -struct sock {} __attribute__((preserve_access_index)); -struct sockaddr {} __attribute__((preserve_access_index)); -struct socket { - struct sock *sk; -} __attribute__((preserve_access_index)); - -struct inode {} __attribute__((preserve_access_index)); -struct dentry { - struct inode *d_inode; -} __attribute__((preserve_access_index)); -struct file { - struct inode *f_inode; -} __attribute__((preserve_access_index)); - - SEC("lsm/inode_unlink") int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) { -- cgit v1.3.1 From 9cde3beeadb311d4b435a7d28d5ab72bcc5de65d Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:46 +0000 Subject: bpf: Add tests for task_local_storage The test exercises the syscall based map operations by creating a pidfd for the current process. For verifying kernel / LSM functionality, the test implements a simple MAC policy which denies an executable from unlinking itself. The LSM program bprm_committed_creds sets a task_local_storage with a pointer to the inode. This is then used to detect if the task is trying to unlink itself in the inode_unlink LSM hook. The test copies /bin/rm to /tmp and executes it in a child thread with the intention of deleting itself. A successful test should prevent the the running executable from deleting itself. The bpf programs are also updated to call bpf_spin_{lock, unlock} to trigger the verfier checks for spin locks. The temporary file is cleaned up later in the test. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-9-kpsingh@chromium.org --- .../selftests/bpf/prog_tests/test_local_storage.c | 185 +++++++++++++++++++-- tools/testing/selftests/bpf/progs/local_storage.c | 61 ++++++- 2 files changed, 226 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 91cd6f357246..4e7f6a4965f2 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -4,30 +4,161 @@ * Copyright (C) 2020 Google LLC. */ +#include +#include #include #include #include "local_storage.skel.h" #include "network_helpers.h" -int create_and_unlink_file(void) +static inline int sys_pidfd_open(pid_t pid, unsigned int flags) { - char fname[PATH_MAX] = "/tmp/fileXXXXXX"; - int fd; + return syscall(__NR_pidfd_open, pid, flags); +} + +static inline ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out, + loff_t *off_out, size_t len, + unsigned int flags) +{ + return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, + len, flags); +} + +static unsigned int duration; + +#define TEST_STORAGE_VALUE 0xbeefdead - fd = mkstemp(fname); - if (fd < 0) - return fd; +struct storage { + void *inode; + unsigned int value; + /* Lock ensures that spin locked versions of local stoage operations + * also work, most operations in this tests are still single threaded + */ + struct bpf_spin_lock lock; +}; + +/* Copies an rm binary to a temp file. dest is a mkstemp template */ +static int copy_rm(char *dest) +{ + int fd_in, fd_out = -1, ret = 0; + struct stat stat; + + fd_in = open("/bin/rm", O_RDONLY); + if (fd_in < 0) + return -errno; + + fd_out = mkstemp(dest); + if (fd_out < 0) { + ret = -errno; + goto out; + } + + ret = fstat(fd_in, &stat); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = copy_file_range(fd_in, NULL, fd_out, NULL, stat.st_size, 0); + if (ret == -1) { + ret = -errno; + goto out; + } + + /* Set executable permission on the copied file */ + ret = chmod(dest, 0100); + if (ret == -1) + ret = -errno; + +out: + close(fd_in); + close(fd_out); + return ret; +} + +/* Fork and exec the provided rm binary and return the exit code of the + * forked process and its pid. + */ +static int run_self_unlink(int *monitored_pid, const char *rm_path) +{ + int child_pid, child_status, ret; + int null_fd; + + child_pid = fork(); + if (child_pid == 0) { + null_fd = open("/dev/null", O_WRONLY); + dup2(null_fd, STDOUT_FILENO); + dup2(null_fd, STDERR_FILENO); + close(null_fd); + + *monitored_pid = getpid(); + /* Use the copied /usr/bin/rm to delete itself + * /tmp/copy_of_rm /tmp/copy_of_rm. + */ + ret = execlp(rm_path, rm_path, rm_path, NULL); + if (ret) + exit(errno); + } else if (child_pid > 0) { + waitpid(child_pid, &child_status, 0); + return WEXITSTATUS(child_status); + } + + return -EINVAL; +} - close(fd); - unlink(fname); - return 0; +static bool check_syscall_operations(int map_fd, int obj_fd) +{ + struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } }, + lookup_val = { .value = 0, .lock = { 0 } }; + int err; + + /* Looking up an existing element should fail initially */ + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", + "err:%d errno:%d\n", err, errno)) + return false; + + /* Create a new element */ + err = bpf_map_update_elem(map_fd, &obj_fd, &val, + BPF_NOEXIST | BPF_F_LOCK); + if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, + errno)) + return false; + + /* Lookup the newly created element */ + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); + if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, + errno)) + return false; + + /* Check the value of the newly created element */ + if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem", + "value got = %x errno:%d", lookup_val.value, val.value)) + return false; + + err = bpf_map_delete_elem(map_fd, &obj_fd); + if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err, + errno)) + return false; + + /* The lookup should fail, now that the element has been deleted */ + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", + "err:%d errno:%d\n", err, errno)) + return false; + + return true; } void test_test_local_storage(void) { + char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX"; + int err, serv_sk = -1, task_fd = -1; struct local_storage *skel = NULL; - int err, duration = 0, serv_sk = -1; skel = local_storage__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -37,12 +168,37 @@ void test_test_local_storage(void) if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) goto close_prog; - skel->bss->monitored_pid = getpid(); + task_fd = sys_pidfd_open(getpid(), 0); + if (CHECK(task_fd < 0, "pidfd_open", + "failed to get pidfd err:%d, errno:%d", task_fd, errno)) + goto close_prog; - err = create_and_unlink_file(); - if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno)) + if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map), + task_fd)) goto close_prog; + err = copy_rm(tmp_exec_path); + if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno)) + goto close_prog; + + /* Sets skel->bss->monitored_pid to the pid of the forked child + * forks a child process that executes tmp_exec_path and tries to + * unlink its executable. This operation should be denied by the loaded + * LSM program. + */ + err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); + if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err)) + goto close_prog_unlink; + + /* Set the process being monitored to be the current process */ + skel->bss->monitored_pid = getpid(); + + /* Remove the temporary created executable */ + err = unlink(tmp_exec_path); + if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path, + errno)) + goto close_prog_unlink; + CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", "inode_local_storage not set\n"); @@ -55,6 +211,9 @@ void test_test_local_storage(void) close(serv_sk); +close_prog_unlink: + unlink(tmp_exec_path); close_prog: + close(task_fd); local_storage__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index ef3822bc7542..3e3de130f28f 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -17,41 +17,64 @@ int monitored_pid = 0; int inode_storage_result = -1; int sk_storage_result = -1; -struct dummy_storage { +struct local_storage { + struct inode *exec_inode; __u32 value; + struct bpf_spin_lock lock; }; struct { __uint(type, BPF_MAP_TYPE_INODE_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); __type(key, int); - __type(value, struct dummy_storage); + __type(value, struct local_storage); } inode_storage_map SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_SK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); __type(key, int); - __type(value, struct dummy_storage); + __type(value, struct local_storage); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct local_storage); +} task_storage_map SEC(".maps"); + SEC("lsm/inode_unlink") int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct dummy_storage *storage; + struct local_storage *storage; + bool is_self_unlink; int err; if (pid != monitored_pid) return 0; + storage = bpf_task_storage_get(&task_storage_map, + bpf_get_current_task_btf(), 0, 0); + if (storage) { + /* Don't let an executable delete itself */ + bpf_spin_lock(&storage->lock); + is_self_unlink = storage->exec_inode == victim->d_inode; + bpf_spin_unlock(&storage->lock); + if (is_self_unlink) + return -EPERM; + } + storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; + bpf_spin_lock(&storage->lock); if (storage->value != DUMMY_STORAGE_VALUE) inode_storage_result = -1; + bpf_spin_unlock(&storage->lock); err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); if (!err) @@ -65,7 +88,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, int addrlen) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct dummy_storage *storage; + struct local_storage *storage; int err; if (pid != monitored_pid) @@ -76,8 +99,10 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, if (!storage) return 0; + bpf_spin_lock(&storage->lock); if (storage->value != DUMMY_STORAGE_VALUE) sk_storage_result = -1; + bpf_spin_unlock(&storage->lock); err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); if (!err) @@ -91,7 +116,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, int protocol, int kern) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct dummy_storage *storage; + struct local_storage *storage; if (pid != monitored_pid) return 0; @@ -101,7 +126,9 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, if (!storage) return 0; + bpf_spin_lock(&storage->lock); storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); return 0; } @@ -110,7 +137,7 @@ SEC("lsm/file_open") int BPF_PROG(file_open, struct file *file) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct dummy_storage *storage; + struct local_storage *storage; if (pid != monitored_pid) return 0; @@ -123,6 +150,26 @@ int BPF_PROG(file_open, struct file *file) if (!storage) return 0; + bpf_spin_lock(&storage->lock); storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); return 0; } + +/* This uses the local storage to remember the inode of the binary that a + * process was originally executing. + */ +SEC("lsm/bprm_committed_creds") +void BPF_PROG(exec, struct linux_binprm *bprm) +{ + struct local_storage *storage; + + storage = bpf_task_storage_get(&task_storage_map, + bpf_get_current_task_btf(), 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (storage) { + bpf_spin_lock(&storage->lock); + storage->exec_inode = bprm->file->f_inode; + bpf_spin_unlock(&storage->lock); + } +} -- cgit v1.3.1 From 4170bc6baa5446e1d85e0b7647ea54ba72aa85c4 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:47 +0000 Subject: bpf: Exercise syscall operations for inode and sk storage Use the check_syscall_operations added for task_local_storage to exercise syscall operations for other local storage maps: * Check the absence of an element for the given fd. * Create a new element, retrieve and compare its value. * Delete the element and check again for absence. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-10-kpsingh@chromium.org --- .../selftests/bpf/prog_tests/test_local_storage.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 4e7f6a4965f2..5fda45982be0 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -157,7 +157,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd) void test_test_local_storage(void) { char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX"; - int err, serv_sk = -1, task_fd = -1; + int err, serv_sk = -1, task_fd = -1, rm_fd = -1; struct local_storage *skel = NULL; skel = local_storage__open_and_load(); @@ -181,6 +181,15 @@ void test_test_local_storage(void) if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno)) goto close_prog; + rm_fd = open(tmp_exec_path, O_RDONLY); + if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d", + tmp_exec_path, rm_fd, errno)) + goto close_prog; + + if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map), + rm_fd)) + goto close_prog; + /* Sets skel->bss->monitored_pid to the pid of the forked child * forks a child process that executes tmp_exec_path and tries to * unlink its executable. This operation should be denied by the loaded @@ -209,11 +218,15 @@ void test_test_local_storage(void) CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", "sk_local_storage not set\n"); - close(serv_sk); + if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map), + serv_sk)) + goto close_prog; close_prog_unlink: unlink(tmp_exec_path); close_prog: + close(serv_sk); + close(rm_fd); close(task_fd); local_storage__destroy(skel); } -- cgit v1.3.1 From 21584e6a92bd2a85411793c0da3d48ab327e9b72 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:40 +0200 Subject: selftests: netdevsim: Add test for nexthop offload API Test various aspects of the nexthop offload API on top of the netdevsim implementation. Both good and bad flows are tested. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/netdevsim/nexthop.sh | 436 +++++++++++++++++++++ 1 file changed, 436 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netdevsim/nexthop.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh new file mode 100755 index 000000000000..be0c1b5ee6b8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -0,0 +1,436 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the nexthop offload API. It makes use of netdevsim +# which registers a listener to the nexthop notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + nexthop_single_add_test + nexthop_single_add_err_test + nexthop_group_add_test + nexthop_group_add_err_test + nexthop_group_replace_test + nexthop_group_replace_err_test + nexthop_single_replace_test + nexthop_single_replace_err_test + nexthop_single_in_group_replace_test + nexthop_single_in_group_replace_err_test + nexthop_single_in_group_delete_test + nexthop_single_in_group_delete_err_test + nexthop_replay_test + nexthop_replay_err_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +nexthop_check() +{ + local nharg="$1"; shift + local expected="$1"; shift + + out=$($IP nexthop show ${nharg} | sed -e 's/ *$//') + if [[ "$out" != "$expected" ]]; then + return 1 + fi + + return 0 +} + +nexthop_resource_check() +{ + local expected_occ=$1; shift + + occ=$($DEVLINK -jp resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="nexthops") | .["occ"]') + + if [ $expected_occ -ne $occ ]; then + return 1 + fi + + return 0 +} + +nexthop_resource_set() +{ + local size=$1; shift + + $DEVLINK resource set $DEVLINK_DEV path nexthops size $size + $DEVLINK dev reload $DEVLINK_DEV +} + +nexthop_single_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 1 + nexthop_resource_check 0 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Single nexthop add and delete" +} + +nexthop_single_add_err_test() +{ + RET=0 + + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop addition succeeded when should fail" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,20/2,39 + nexthop_check "id 10" "id 10 group 1,20/2,39 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_resource_check 61 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 + nexthop_check "id 10" "id 10 group 1/2/3 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Nexthop group replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_replace_err_test() +{ + RET=0 + + # This is supposed to cause the replace to fail because the new nexthop + # is programmed before deleting the replaced one. + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop replace succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Single nexthop replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_delete_err_test() +{ + RET=0 + + # First, nexthop 1 will be deleted, which will reduce the occupancy to + # 5. Afterwards, a replace notification will be sent for nexthop group + # 10 with only two nexthops. Since the new group is allocated before + # the old is deleted, the replacement will fail as it will result in an + # occupancy of 7. + nexthop_resource_set 6 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 + + $IP nexthop del id 1 + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_replay_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after reload" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop replay" + + $IP nexthop flush &> /dev/null +} + +nexthop_replay_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + # Reduce size of nexthop resource so that reload will fail. + $DEVLINK resource set $DEVLINK_DEV path nexthops size 3 + $DEVLINK dev reload $DEVLINK_DEV &> /dev/null + check_fail $? "Reload succeeded when should fail" + + $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999 + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + log_test "Nexthop replay failure" + + $IP nexthop flush &> /dev/null +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + IP="ip -netns testns1" + DEVLINK="devlink -N testns1" + + $IP link add name dummy1 up type dummy + $IP address add 192.0.2.1/24 dev dummy1 + + set +e +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS -- cgit v1.3.1 From f055f355faf1991ef4e6b3c3517f8f2fc247805e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 6 Nov 2020 12:33:46 -0800 Subject: selftests/bpf: Fix selftest build with old libc pidfd_open was added in 2019. Some versions of libc library don't define it. Define it manually if it's not available. Reported-by: Sergei Iudin Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/test_local_storage.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 5fda45982be0..fcca7ba1f368 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -12,6 +12,10 @@ #include "local_storage.skel.h" #include "network_helpers.h" +#ifndef __NR_pidfd_open +#define __NR_pidfd_open 434 +#endif + static inline int sys_pidfd_open(pid_t pid, unsigned int flags) { return syscall(__NR_pidfd_open, pid, flags); -- cgit v1.3.1 From 899f317e4886f916ed21027177177c11b577cea1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Sep 2020 12:27:03 -0700 Subject: rcuscale: Add RCU Tasks Trace This commit adds the ability to test performance and scalability of RCU Tasks Trace updaters. Reported-by: Alexei Starovoitov Signed-off-by: Paul E. McKenney --- kernel/rcu/rcuscale.c | 32 +++++++++++++++++++++- .../selftests/rcutorture/configs/rcuscale/CFcommon | 3 ++ .../selftests/rcutorture/configs/rcuscale/TRACE01 | 15 ++++++++++ .../rcutorture/configs/rcuscale/TRACE01.boot | 1 + 4 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot (limited to 'tools') diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 2819b95479af..c42f2401c374 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "rcu.h" @@ -294,6 +295,35 @@ static struct rcu_scale_ops tasks_ops = { .name = "tasks" }; +/* + * Definitions for RCU-tasks-trace scalability testing. + */ + +static int tasks_trace_scale_read_lock(void) +{ + rcu_read_lock_trace(); + return 0; +} + +static void tasks_trace_scale_read_unlock(int idx) +{ + rcu_read_unlock_trace(); +} + +static struct rcu_scale_ops tasks_tracing_ops = { + .ptype = RCU_TASKS_FLAVOR, + .init = rcu_sync_scale_init, + .readlock = tasks_trace_scale_read_lock, + .readunlock = tasks_trace_scale_read_unlock, + .get_gp_seq = rcu_no_completed, + .gp_diff = rcu_seq_diff, + .async = call_rcu_tasks_trace, + .gp_barrier = rcu_barrier_tasks_trace, + .sync = synchronize_rcu_tasks_trace, + .exp_sync = synchronize_rcu_tasks_trace, + .name = "tasks-tracing" +}; + static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old) { if (!cur_ops->gp_diff) @@ -754,7 +784,7 @@ rcu_scale_init(void) long i; int firsterr = 0; static struct rcu_scale_ops *scale_ops[] = { - &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, + &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, &tasks_tracing_ops }; if (!torture_init_begin(scale_type, verbose)) diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon index 87caa0e932c7..90942bb5bebc 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon @@ -1,2 +1,5 @@ CONFIG_RCU_SCALE_TEST=y CONFIG_PRINTK_TIME=y +CONFIG_TASKS_RCU_GENERIC=y +CONFIG_TASKS_RCU=y +CONFIG_TASKS_TRACE_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 new file mode 100644 index 000000000000..e6baa2fbaeb3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 @@ -0,0 +1,15 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot new file mode 100644 index 000000000000..af0aff1457a4 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot @@ -0,0 +1 @@ +rcuscale.scale_type=tasks-tracing -- cgit v1.3.1 From 45c7b962014da36c2ac1aee6e5014b644ba37a84 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Sep 2020 22:24:57 -0700 Subject: rcuscale: Avoid divide by zero The rcuscale test module does not use batches, so there is only ever one batch. This commit therefore informs the kvm-recheck-rcuscale.sh script of this fact of life. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh index aa745152a525..b582113178ac 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh @@ -32,7 +32,7 @@ sed -e 's/^\[[^]]*]//' < $i/console.log | awk ' /-scale: .* gps: .* batches:/ { ngps = $9; - nbatches = $11; + nbatches = 1; } /-scale: .*writer-duration/ { -- cgit v1.3.1 From 8d68e68a781db80606c8e8f3e4383be6974878fd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Sep 2020 10:10:10 -0700 Subject: torture: Exclude "NOHZ tick-stop error" from fatal errors The "NOHZ tick-stop error: Non-RCU local softirq work is pending" warning happens frequently and appears to be irrelevant to the various torture tests. This commit therefore filters it out. If there proves to be a need to pay attention to it a later commit will add an "advice" category to allow the user to immediately see that although something happened, it was not an indictment of the system being tortured. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/console-badness.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index 0e4c0b2eb7f0..80ae7f08b363 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -13,4 +13,5 @@ egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | -grep -v 'Warning: unable to open an initial console' +grep -v 'Warning: unable to open an initial console' | +grep -v 'NOHZ tick-stop error: Non-RCU local softirq work is pending, handler' -- cgit v1.3.1 From 6f26d010e678249367cc00b5a827c3731c8138f3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 Sep 2020 12:34:01 -0700 Subject: rcutorture: Adjust scenarios SRCU-t and SRCU-u to make kconfig happy The SRCU-u scenario expects to enable lockdep but to also disable the CONFIG_PREEMPT_COUNT kconfig option. This no longer works. This commit therefore instead enables lockdep in SRCU-t, which then allows SRCU-u to disable CONFIG_PREEMPT_COUNT. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/SRCU-t | 3 ++- tools/testing/selftests/rcutorture/configs/rcu/SRCU-u | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t index 6c78022c8cd8..d6557c38dfe4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t @@ -4,7 +4,8 @@ CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_TINY_SRCU=y CONFIG_RCU_TRACE=n -CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_DEBUG_ATOMIC_SLEEP=y #CHECK#CONFIG_PREEMPT_COUNT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u index c15ada821e45..6bc24e99862f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u @@ -4,7 +4,6 @@ CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_TINY_SRCU=y CONFIG_RCU_TRACE=n -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_LOCKING=y +CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_PREEMPT_COUNT=n -- cgit v1.3.1 From c64659ef29e3901be0900ec6fb0485fa3dbdcfd8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 18 Sep 2020 13:26:22 -0700 Subject: torture: Prevent jitter processes from delaying failed run Even when the kernel panics and qemu dies, runs with jitter enabled will continue uselessly until the jitter.sh processes terminate. This can be annoying if a planned one-hour run instead dies during boot. This commit therefore kills the jitter.sh processes when the run ends more than one minute prior to the termination time specified by the kvm.sh --duration argument or its default. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 14 ++++++++++++++ tools/testing/selftests/rcutorture/bin/kvm.sh | 5 ++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index d04966ab88cc..3cd03d01857c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -226,6 +226,20 @@ do echo "ps -fp $killpid" >> $resdir/Warnings 2>&1 ps -fp $killpid >> $resdir/Warnings 2>&1 fi + # Reduce probability of PID reuse by allowing a one-minute buffer + if test $((kruntime + 60)) -lt $seconds && test -s "$resdir/../jitter_pids" + then + awk < "$resdir/../jitter_pids" ' + NF > 0 { + pidlist = pidlist " " $1; + n++; + } + END { + if (n > 0) { + print "kill " pidlist; + } + }' | sh + fi else echo ' ---' `date`: "Kernel done" fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6eb1d3f6524d..5ad3882563ce 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -459,8 +459,11 @@ function dump(first, pastlast, batchnum) print "if test -n \"$needqemurun\"" print "then" print "\techo ---- Starting kernels. `date` | tee -a " rd "log"; - for (j = 0; j < njitter; j++) + print "\techo > " rd "jitter_pids" + for (j = 0; j < njitter; j++) { print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&" + print "\techo $! >> " rd "jitter_pids" + } print "\twait" print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log"; print "else" -- cgit v1.3.1 From c1e06287583e5ec496e4c02bf5b319e5e41a1fd2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 21 Sep 2020 13:18:48 -0700 Subject: torture: Force weak-hashed pointers on console log Although the rcutorture scripting now deals correctly with full-up security-induced pointer obfuscation, it is still counter-productive for kernel hackers who are analyzing console output. This commit therefore sets the debug_boot_weak_hash kernel boot parameter, which enables printing of weak-hashed pointers for torture-test runs. Please note that this change applies only to runs initiated by the kvm.sh scripting. If you are instead using modprobe and rmmod, it is your responsibility to build and boot the underlying kernel to your taste. Please note further that this change does not result in a security hole in normal use. The rcutorture testing runs with a negligible userspace, no networking, and no user interaction. Besides which, there is no data of value that can be extracted from an rcutorture guest OS that could not also be extracted from the host that this guest is running on. Suggested-by: Anna-Maria Gleixner Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/functions.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index 51f3464b96d3..82663495fb38 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -169,6 +169,7 @@ identify_qemu () { # Output arguments for the qemu "-append" string based on CPU type # and the TORTURE_QEMU_INTERACTIVE environment variable. identify_qemu_append () { + echo debug_boot_weak_hash local console=ttyS0 case "$1" in qemu-system-x86_64|qemu-system-i386) -- cgit v1.3.1 From 7de1ca35269ee20e40c35666c810cbaea528c719 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Sep 2020 17:20:11 -0700 Subject: torture: Accept time units on kvm.sh --duration argument The "--duration " has worked well for a very long time, but it can be inconvenient to compute the minutes for (say) a 28-hour run. It can also be annoying to have to let a simple boot test run for a full minute. This commit therefore permits an "s" suffix to specify seconds, "m" to specify minutes (which remains the default), "h" suffix to specify hours, and "d" to specify days. With this change, "--duration 5" still specifies that each scenario run for five minutes, but "--duration 30s" runs for only 30 seconds, "--duration 8h" runs for eight hours, and "--duration 2d" runs for two days. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 5ad3882563ce..c348d962304f 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -58,7 +58,7 @@ usage () { echo " --datestamp string" echo " --defconfig string" echo " --dryrun sched|script" - echo " --duration minutes" + echo " --duration minutes | s | h | d" echo " --gdb" echo " --help" echo " --interactive" @@ -128,8 +128,20 @@ do shift ;; --duration) - checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error' - dur=$(($2*60)) + checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error' + mult=60 + if echo "$2" | grep -q 's$' + then + mult=1 + elif echo "$2" | grep -q 'h$' + then + mult=3600 + elif echo "$2" | grep -q 'd$' + then + mult=86400 + fi + ts=`echo $2 | sed -e 's/[smhd]$//'` + dur=$(($ts*mult)) shift ;; --gdb) -- cgit v1.3.1 From a5136f4ffb44f8c1a80406c5bfd4d233433398e6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 24 Sep 2020 08:52:33 -0700 Subject: torture: Allow alternative forms of kvm.sh command-line arguments This commit allows --build-only as a synonym for --buildonly, --kconfigs for --kconfig, and --kmake-args for --kmake-arg. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index c348d962304f..45d07b7b69f5 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -93,7 +93,7 @@ do TORTURE_BOOT_IMAGE="$2" shift ;; - --buildonly) + --buildonly|--build-only) TORTURE_BUILDONLY=1 ;; --configs|--config) @@ -160,7 +160,7 @@ do jitter="$2" shift ;; - --kconfig) + --kconfig|--kconfigs) checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$' TORTURE_KCONFIG_ARG="$2" shift @@ -171,7 +171,7 @@ do --kcsan) TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; - --kmake-arg) + --kmake-arg|--kmake-args) checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' TORTURE_KMAKE_ARG="$2" shift -- cgit v1.3.1 From 6c5b9de2c63b2f513a580c6c80d455350012e99b Mon Sep 17 00:00:00 2001 From: Samuel Hernandez Date: Sun, 11 Oct 2020 14:22:31 -0400 Subject: rcutorture/nolibc: Fix a typo in header file This fixes a typo. Before this, the AT_FDCWD macro would be defined regardless of whether or not it's been defined before. Signed-off-by: Samuel Hernandez Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney --- tools/include/nolibc/nolibc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 2551e9b71167..d6d2623c99ad 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -231,7 +231,7 @@ struct rusage { #define DT_SOCK 12 /* all the *at functions */ -#ifndef AT_FDWCD +#ifndef AT_FDCWD #define AT_FDCWD -100 #endif -- cgit v1.3.1 From 5be7d80deb80ceef50a6bd86d83c8fd62264778a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 Oct 2020 11:42:17 -0700 Subject: torture: Make kvm-check-branches.sh use --allcpus Currently the kvm-check-branches.sh script calculates the number of CPUs and passes this to the kvm.sh --cpus command-line argument. This works, but this commit saves a line by instead using the new kvm.sh --allcpus command-line argument. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh index 6e65c134e5f1..370406bbfeed 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh @@ -52,8 +52,7 @@ echo Results directory: $resdir/$ds KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM PATH=${KVM}/bin:$PATH; export PATH . functions.sh -cpus="`identify_qemu_vcpus`" -echo Using up to $cpus CPUs. +echo Using all `identify_qemu_vcpus` CPUs. # Each pass through this loop does one command-line argument. for gitbr in $@ @@ -74,7 +73,7 @@ do # Test the specified commit. git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1 echo git checkout return code: $? "(Commit $ntry: $i)" - kvm.sh --cpus $cpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1 + kvm.sh --allcpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1 ret=$? echo kvm.sh return code $ret for commit $i from branch $gitbr -- cgit v1.3.1 From 06dc8d4591b8d8ce0ece94474718b53f0a5c5de3 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Tue, 20 Oct 2020 21:22:56 +0200 Subject: tools/nolibc: Fix a spelling error in a comment Fix a spelling in the comment line. s/memry/memory/p This is on linux-next. Signed-off-by: Bhaskar Chowdhury Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney --- tools/include/nolibc/nolibc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index d6d2623c99ad..e61d36cd4e50 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -107,7 +107,7 @@ static int errno; #endif /* errno codes all ensure that they will not conflict with a valid pointer - * because they all correspond to the highest addressable memry page. + * because they all correspond to the highest addressable memory page. */ #define MAX_ERRNO 4095 -- cgit v1.3.1 From 01f9e708d9eae6335ae9ff25ab09893c20727a55 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 2 Nov 2020 18:12:20 +0100 Subject: tools/rcutorture: Fix BUG parsing of console.log For the rcutorture test summary log file console.log of virtual machines is parsed. When a console.log contains "DEBUG", BUG counter is incremented because regular expression does not handle to ignore DEBUG. Signed-off-by: Anna-Maria Behnsen Reviewed-by: Benedikt Spranger Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/parse-console.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index e03338091a06..263b1be50008 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -133,7 +133,7 @@ then then summary="$summary Warnings: $n_warn" fi - n_bugs=`egrep -c 'BUG|Oops:' $file` + n_bugs=`egrep -c '\bBUG|Oops:' $file` if test "$n_bugs" -ne 0 then summary="$summary Bugs: $n_bugs" -- cgit v1.3.1 From ebb477cb2fb7a44ff600e0a7393bad906a0ecd80 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 11 Aug 2020 11:27:33 -0700 Subject: tools/memory-model: Document categories of ordering primitives The Linux kernel has a number of categories of ordering primitives, which are recorded in the LKMM implementation and hinted at by cheatsheet.txt. But there is no overview of these categories, and such an overview is needed in order to understand multithreaded LKMM litmus tests. This commit therefore adds an ordering.txt as well as extracting a control-dependencies.txt from memory-barriers.txt. It also updates the README file. [ paulmck: Apply Akira Yokosawa file-placement feedback. ] [ paulmck: Apply Alan Stern feedback. ] [ paulmck: Apply self-review feedback. ] Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/README | 17 + .../Documentation/control-dependencies.txt | 258 ++++++++++ tools/memory-model/Documentation/ordering.txt | 556 +++++++++++++++++++++ 3 files changed, 831 insertions(+) create mode 100644 tools/memory-model/Documentation/control-dependencies.txt create mode 100644 tools/memory-model/Documentation/ordering.txt (limited to 'tools') diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README index 2d9539f19912..db90a26dbdf4 100644 --- a/tools/memory-model/Documentation/README +++ b/tools/memory-model/Documentation/README @@ -11,6 +11,12 @@ the material provided by documents earlier in this list. o You are new to Linux-kernel concurrency: simple.txt +o You have some background in Linux-kernel concurrency, and would + like an overview of the types of low-level concurrency primitives + that the Linux kernel provides: ordering.txt + + Here, "low level" means atomic operations to single variables. + o You are familiar with the Linux-kernel concurrency primitives that you need, and just want to get started with LKMM litmus tests: litmus-tests.txt @@ -19,6 +25,9 @@ o You are familiar with Linux-kernel concurrency, and would like a detailed intuitive understanding of LKMM, including situations involving more than two threads: recipes.txt +o You would like a detailed understanding of what your compiler can + and cannot do to control dependencies: control-dependencies.txt + o You are familiar with Linux-kernel concurrency and the use of LKMM, and would like a quick reference: cheatsheet.txt @@ -41,6 +50,10 @@ README cheatsheet.txt Quick-reference guide to the Linux-kernel memory model. +control-dependencies.txt + Guide to preventing compiler optimizations from destroying + your control dependencies. + explanation.txt Detailed description of the memory model. @@ -48,6 +61,10 @@ litmus-tests.txt The format, features, capabilities, and limitations of the litmus tests that LKMM can evaluate. +ordering.txt + Overview of the Linux kernel's low-level memory-ordering + primitives by category. + recipes.txt Common memory-ordering patterns. diff --git a/tools/memory-model/Documentation/control-dependencies.txt b/tools/memory-model/Documentation/control-dependencies.txt new file mode 100644 index 000000000000..8b743d20fe27 --- /dev/null +++ b/tools/memory-model/Documentation/control-dependencies.txt @@ -0,0 +1,258 @@ +CONTROL DEPENDENCIES +==================== + +A major difficulty with control dependencies is that current compilers +do not support them. One purpose of this document is therefore to +help you prevent your compiler from breaking your code. However, +control dependencies also pose other challenges, which leads to the +second purpose of this document, namely to help you to avoid breaking +your own code, even in the absence of help from your compiler. + +One such challenge is that control dependencies order only later stores. +Therefore, a load-load control dependency will not preserve ordering +unless a read memory barrier is provided. Consider the following code: + + q = READ_ONCE(a); + if (q) + p = READ_ONCE(b); + +This is not guaranteed to provide any ordering because some types of CPUs +are permitted to predict the result of the load from "b". This prediction +can cause other CPUs to see this load as having happened before the load +from "a". This means that an explicit read barrier is required, for example +as follows: + + q = READ_ONCE(a); + if (q) { + smp_rmb(); + p = READ_ONCE(b); + } + +However, stores are not speculated. This means that ordering is +(usually) guaranteed for load-store control dependencies, as in the +following example: + + q = READ_ONCE(a); + if (q) + WRITE_ONCE(b, 1); + +Control dependencies can pair with each other and with other types +of ordering. But please note that neither the READ_ONCE() nor the +WRITE_ONCE() are optional. Without the READ_ONCE(), the compiler might +fuse the load from "a" with other loads. Without the WRITE_ONCE(), +the compiler might fuse the store to "b" with other stores. Worse yet, +the compiler might convert the store into a load and a check followed +by a store, and this compiler-generated load would not be ordered by +the control dependency. + +Furthermore, if the compiler is able to prove that the value of variable +"a" is always non-zero, it would be well within its rights to optimize +the original example by eliminating the "if" statement as follows: + + q = a; + b = 1; /* BUG: Compiler and CPU can both reorder!!! */ + +So don't leave out either the READ_ONCE() or the WRITE_ONCE(). +In particular, although READ_ONCE() does force the compiler to emit a +load, it does *not* force the compiler to actually use the loaded value. + +It is tempting to try use control dependencies to enforce ordering on +identical stores on both branches of the "if" statement as follows: + + q = READ_ONCE(a); + if (q) { + barrier(); + WRITE_ONCE(b, 1); + do_something(); + } else { + barrier(); + WRITE_ONCE(b, 1); + do_something_else(); + } + +Unfortunately, current compilers will transform this as follows at high +optimization levels: + + q = READ_ONCE(a); + barrier(); + WRITE_ONCE(b, 1); /* BUG: No ordering vs. load from a!!! */ + if (q) { + /* WRITE_ONCE(b, 1); -- moved up, BUG!!! */ + do_something(); + } else { + /* WRITE_ONCE(b, 1); -- moved up, BUG!!! */ + do_something_else(); + } + +Now there is no conditional between the load from "a" and the store to +"b", which means that the CPU is within its rights to reorder them: The +conditional is absolutely required, and must be present in the final +assembly code, after all of the compiler and link-time optimizations +have been applied. Therefore, if you need ordering in this example, +you must use explicit memory ordering, for example, smp_store_release(): + + q = READ_ONCE(a); + if (q) { + smp_store_release(&b, 1); + do_something(); + } else { + smp_store_release(&b, 1); + do_something_else(); + } + +Without explicit memory ordering, control-dependency-based ordering is +guaranteed only when the stores differ, for example: + + q = READ_ONCE(a); + if (q) { + WRITE_ONCE(b, 1); + do_something(); + } else { + WRITE_ONCE(b, 2); + do_something_else(); + } + +The initial READ_ONCE() is still required to prevent the compiler from +knowing too much about the value of "a". + +But please note that you need to be careful what you do with the local +variable "q", otherwise the compiler might be able to guess the value +and again remove the conditional branch that is absolutely required to +preserve ordering. For example: + + q = READ_ONCE(a); + if (q % MAX) { + WRITE_ONCE(b, 1); + do_something(); + } else { + WRITE_ONCE(b, 2); + do_something_else(); + } + +If MAX is compile-time defined to be 1, then the compiler knows that +(q % MAX) must be equal to zero, regardless of the value of "q". +The compiler is therefore within its rights to transform the above code +into the following: + + q = READ_ONCE(a); + WRITE_ONCE(b, 2); + do_something_else(); + +Given this transformation, the CPU is not required to respect the ordering +between the load from variable "a" and the store to variable "b". It is +tempting to add a barrier(), but this does not help. The conditional +is gone, and the barrier won't bring it back. Therefore, if you need +to relying on control dependencies to produce this ordering, you should +make sure that MAX is greater than one, perhaps as follows: + + q = READ_ONCE(a); + BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */ + if (q % MAX) { + WRITE_ONCE(b, 1); + do_something(); + } else { + WRITE_ONCE(b, 2); + do_something_else(); + } + +Please note once again that each leg of the "if" statement absolutely +must store different values to "b". As in previous examples, if the two +values were identical, the compiler could pull this store outside of the +"if" statement, destroying the control dependency's ordering properties. + +You must also be careful avoid relying too much on boolean short-circuit +evaluation. Consider this example: + + q = READ_ONCE(a); + if (q || 1 > 0) + WRITE_ONCE(b, 1); + +Because the first condition cannot fault and the second condition is +always true, the compiler can transform this example as follows, again +destroying the control dependency's ordering: + + q = READ_ONCE(a); + WRITE_ONCE(b, 1); + +This is yet another example showing the importance of preventing the +compiler from out-guessing your code. Again, although READ_ONCE() really +does force the compiler to emit code for a given load, the compiler is +within its rights to discard the loaded value. + +In addition, control dependencies apply only to the then-clause and +else-clause of the "if" statement in question. In particular, they do +not necessarily order the code following the entire "if" statement: + + q = READ_ONCE(a); + if (q) { + WRITE_ONCE(b, 1); + } else { + WRITE_ONCE(b, 2); + } + WRITE_ONCE(c, 1); /* BUG: No ordering against the read from "a". */ + +It is tempting to argue that there in fact is ordering because the +compiler cannot reorder volatile accesses and also cannot reorder +the writes to "b" with the condition. Unfortunately for this line +of reasoning, the compiler might compile the two writes to "b" as +conditional-move instructions, as in this fanciful pseudo-assembly +language: + + ld r1,a + cmp r1,$0 + cmov,ne r4,$1 + cmov,eq r4,$2 + st r4,b + st $1,c + +The control dependencies would then extend only to the pair of cmov +instructions and the store depending on them. This means that a weakly +ordered CPU would have no dependency of any sort between the load from +"a" and the store to "c". In short, control dependencies provide ordering +only to the stores in the then-clause and else-clause of the "if" statement +in question (including functions invoked by those two clauses), and not +to code following that "if" statement. + + +In summary: + + (*) Control dependencies can order prior loads against later stores. + However, they do *not* guarantee any other sort of ordering: + Not prior loads against later loads, nor prior stores against + later anything. If you need these other forms of ordering, use + smp_load_acquire(), smp_store_release(), or, in the case of prior + stores and later loads, smp_mb(). + + (*) If both legs of the "if" statement contain identical stores to + the same variable, then you must explicitly order those stores, + either by preceding both of them with smp_mb() or by using + smp_store_release(). Please note that it is *not* sufficient to use + barrier() at beginning and end of each leg of the "if" statement + because, as shown by the example above, optimizing compilers can + destroy the control dependency while respecting the letter of the + barrier() law. + + (*) Control dependencies require at least one run-time conditional + between the prior load and the subsequent store, and this + conditional must involve the prior load. If the compiler is able + to optimize the conditional away, it will have also optimized + away the ordering. Careful use of READ_ONCE() and WRITE_ONCE() + can help to preserve the needed conditional. + + (*) Control dependencies require that the compiler avoid reordering the + dependency into nonexistence. Careful use of READ_ONCE() or + atomic{,64}_read() can help to preserve your control dependency. + + (*) Control dependencies apply only to the then-clause and else-clause + of the "if" statement containing the control dependency, including + any functions that these two clauses call. Control dependencies + do *not* apply to code beyond the end of that "if" statement. + + (*) Control dependencies pair normally with other types of barriers. + + (*) Control dependencies do *not* provide multicopy atomicity. If you + need all the CPUs to agree on the ordering of a given store against + all other accesses, use smp_mb(). + + (*) Compilers do not understand control dependencies. It is therefore + your job to ensure that they do not break your code. diff --git a/tools/memory-model/Documentation/ordering.txt b/tools/memory-model/Documentation/ordering.txt new file mode 100644 index 000000000000..9b0949d3f5ec --- /dev/null +++ b/tools/memory-model/Documentation/ordering.txt @@ -0,0 +1,556 @@ +This document gives an overview of the categories of memory-ordering +operations provided by the Linux-kernel memory model (LKMM). + + +Categories of Ordering +====================== + +This section lists LKMM's three top-level categories of memory-ordering +operations in decreasing order of strength: + +1. Barriers (also known as "fences"). A barrier orders some or + all of the CPU's prior operations against some or all of its + subsequent operations. + +2. Ordered memory accesses. These operations order themselves + against some or all of the CPU's prior accesses or some or all + of the CPU's subsequent accesses, depending on the subcategory + of the operation. + +3. Unordered accesses, as the name indicates, have no ordering + properties except to the extent that they interact with an + operation in the previous categories. This being the real world, + some of these "unordered" operations provide limited ordering + in some special situations. + +Each of the above categories is described in more detail by one of the +following sections. + + +Barriers +======== + +Each of the following categories of barriers is described in its own +subsection below: + +a. Full memory barriers. + +b. Read-modify-write (RMW) ordering augmentation barriers. + +c. Write memory barrier. + +d. Read memory barrier. + +e. Compiler barrier. + +Note well that many of these primitives generate absolutely no code +in kernels built with CONFIG_SMP=n. Therefore, if you are writing +a device driver, which must correctly order accesses to a physical +device even in kernels built with CONFIG_SMP=n, please use the +ordering primitives provided for that purpose. For example, instead of +smp_mb(), use mb(). See the "Linux Kernel Device Drivers" book or the +https://lwn.net/Articles/698014/ article for more information. + + +Full Memory Barriers +-------------------- + +The Linux-kernel primitives that provide full ordering include: + +o The smp_mb() full memory barrier. + +o Value-returning RMW atomic operations whose names do not end in + _acquire, _release, or _relaxed. + +o RCU's grace-period primitives. + +First, the smp_mb() full memory barrier orders all of the CPU's prior +accesses against all subsequent accesses from the viewpoint of all CPUs. +In other words, all CPUs will agree that any earlier action taken +by that CPU happened before any later action taken by that same CPU. +For example, consider the following: + + WRITE_ONCE(x, 1); + smp_mb(); // Order store to x before load from y. + r1 = READ_ONCE(y); + +All CPUs will agree that the store to "x" happened before the load +from "y", as indicated by the comment. And yes, please comment your +memory-ordering primitives. It is surprisingly hard to remember their +purpose after even a few months. + +Second, some RMW atomic operations provide full ordering. These +operations include value-returning RMW atomic operations (that is, those +with non-void return types) whose names do not end in _acquire, _release, +or _relaxed. Examples include atomic_add_return(), atomic_dec_and_test(), +cmpxchg(), and xchg(). Note that conditional RMW atomic operations such +as cmpxchg() are only guaranteed to provide ordering when they succeed. +When RMW atomic operations provide full ordering, they partition the +CPU's accesses into three groups: + +1. All code that executed prior to the RMW atomic operation. + +2. The RMW atomic operation itself. + +3. All code that executed after the RMW atomic operation. + +All CPUs will agree that any operation in a given partition happened +before any operation in a higher-numbered partition. + +In contrast, non-value-returning RMW atomic operations (that is, those +with void return types) do not guarantee any ordering whatsoever. Nor do +value-returning RMW atomic operations whose names end in _relaxed. +Examples of the former include atomic_inc() and atomic_dec(), +while examples of the latter include atomic_cmpxchg_relaxed() and +atomic_xchg_relaxed(). Similarly, value-returning non-RMW atomic +operations such as atomic_read() do not guarantee full ordering, and +are covered in the later section on unordered operations. + +Value-returning RMW atomic operations whose names end in _acquire or +_release provide limited ordering, and will be described later in this +document. + +Finally, RCU's grace-period primitives provide full ordering. These +primitives include synchronize_rcu(), synchronize_rcu_expedited(), +synchronize_srcu() and so on. However, these primitives have orders +of magnitude greater overhead than smp_mb(), atomic_xchg(), and so on. +Furthermore, RCU's grace-period primitives can only be invoked in +sleepable contexts. Therefore, RCU's grace-period primitives are +typically instead used to provide ordering against RCU read-side critical +sections, as documented in their comment headers. But of course if you +need a synchronize_rcu() to interact with readers, it costs you nothing +to also rely on its additional full-memory-barrier semantics. Just please +carefully comment this, otherwise your future self will hate you. + + +RMW Ordering Augmentation Barriers +---------------------------------- + +As noted in the previous section, non-value-returning RMW operations +such as atomic_inc() and atomic_dec() guarantee no ordering whatsoever. +Nevertheless, a number of popular CPU families, including x86, provide +full ordering for these primitives. One way to obtain full ordering on +all architectures is to add a call to smp_mb(): + + WRITE_ONCE(x, 1); + atomic_inc(&my_counter); + smp_mb(); // Inefficient on x86!!! + r1 = READ_ONCE(y); + +This works, but the added smp_mb() adds needless overhead for +x86, on which atomic_inc() provides full ordering all by itself. +The smp_mb__after_atomic() primitive can be used instead: + + WRITE_ONCE(x, 1); + atomic_inc(&my_counter); + smp_mb__after_atomic(); // Order store to x before load from y. + r1 = READ_ONCE(y); + +The smp_mb__after_atomic() primitive emits code only on CPUs whose +atomic_inc() implementations do not guarantee full ordering, thus +incurring no unnecessary overhead on x86. There are a number of +variations on the smp_mb__*() theme: + +o smp_mb__before_atomic(), which provides full ordering prior + to an unordered RMW atomic operation. + +o smp_mb__after_atomic(), which, as shown above, provides full + ordering subsequent to an unordered RMW atomic operation. + +o smp_mb__after_spinlock(), which provides full ordering subsequent + to a successful spinlock acquisition. Note that spin_lock() is + always successful but spin_trylock() might not be. + +o smp_mb__after_srcu_read_unlock(), which provides full ordering + subsequent to an srcu_read_unlock(). + +It is bad practice to place code between the smp__*() primitive and the +operation whose ordering that it is augmenting. The reason is that the +ordering of this intervening code will differ from one CPU architecture +to another. + + +Write Memory Barrier +-------------------- + +The Linux kernel's write memory barrier is smp_wmb(). If a CPU executes +the following code: + + WRITE_ONCE(x, 1); + smp_wmb(); + WRITE_ONCE(y, 1); + +Then any given CPU will see the write to "x" has having happened before +the write to "y". However, you are usually better off using a release +store, as described in the "Release Operations" section below. + +Note that smp_wmb() might fail to provide ordering for unmarked C-language +stores because profile-driven optimization could determine that the +value being overwritten is almost always equal to the new value. Such a +compiler might then reasonably decide to transform "x = 1" and "y = 1" +as follows: + + if (x != 1) + x = 1; + smp_wmb(); // BUG: does not order the reads!!! + if (y != 1) + y = 1; + +Therefore, if you need to use smp_wmb() with unmarked C-language writes, +you will need to make sure that none of the compilers used to build +the Linux kernel carry out this sort of transformation, both now and in +the future. + + +Read Memory Barrier +------------------- + +The Linux kernel's read memory barrier is smp_rmb(). If a CPU executes +the following code: + + r0 = READ_ONCE(y); + smp_rmb(); + r1 = READ_ONCE(x); + +Then any given CPU will see the read from "y" as having preceded the read from +"x". However, you are usually better off using an acquire load, as described +in the "Acquire Operations" section below. + +Compiler Barrier +---------------- + +The Linux kernel's compiler barrier is barrier(). This primitive +prohibits compiler code-motion optimizations that might move memory +references across the point in the code containing the barrier(), but +does not constrain hardware memory ordering. For example, this can be +used to prevent to compiler from moving code across an infinite loop: + + WRITE_ONCE(x, 1); + while (dontstop) + barrier(); + r1 = READ_ONCE(y); + +Without the barrier(), the compiler would be within its rights to move the +WRITE_ONCE() to follow the loop. This code motion could be problematic +in the case where an interrupt handler terminates the loop. Another way +to handle this is to use READ_ONCE() for the load of "dontstop". + +Note that the barriers discussed previously use barrier() or its low-level +equivalent in their implementations. + + +Ordered Memory Accesses +======================= + +The Linux kernel provides a wide variety of ordered memory accesses: + +a. Release operations. + +b. Acquire operations. + +c. RCU read-side ordering. + +d. Control dependencies. + +Each of the above categories has its own section below. + + +Release Operations +------------------ + +Release operations include smp_store_release(), atomic_set_release(), +rcu_assign_pointer(), and value-returning RMW operations whose names +end in _release. These operations order their own store against all +of the CPU's prior memory accesses. Release operations often provide +improved readability and performance compared to explicit barriers. +For example, use of smp_store_release() saves a line compared to the +smp_wmb() example above: + + WRITE_ONCE(x, 1); + smp_store_release(&y, 1); + +More important, smp_store_release() makes it easier to connect up the +different pieces of the concurrent algorithm. The variable stored to +by the smp_store_release(), in this case "y", will normally be used in +an acquire operation in other parts of the concurrent algorithm. + +To see the performance advantages, suppose that the above example read +from "x" instead of writing to it. Then an smp_wmb() could not guarantee +ordering, and an smp_mb() would be needed instead: + + r1 = READ_ONCE(x); + smp_mb(); + WRITE_ONCE(y, 1); + +But smp_mb() often incurs much higher overhead than does +smp_store_release(), which still provides the needed ordering of "x" +against "y". On x86, the version using smp_store_release() might compile +to a simple load instruction followed by a simple store instruction. +In contrast, the smp_mb() compiles to an expensive instruction that +provides the needed ordering. + +There is a wide variety of release operations: + +o Store operations, including not only the aforementioned + smp_store_release(), but also atomic_set_release(), and + atomic_long_set_release(). + +o RCU's rcu_assign_pointer() operation. This is the same as + smp_store_release() except that: (1) It takes the pointer to + be assigned to instead of a pointer to that pointer, (2) It + is intended to be used in conjunction with rcu_dereference() + and similar rather than smp_load_acquire(), and (3) It checks + for an RCU-protected pointer in "sparse" runs. + +o Value-returning RMW operations whose names end in _release, + such as atomic_fetch_add_release() and cmpxchg_release(). + Note that release ordering is guaranteed only against the + memory-store portion of the RMW operation, and not against the + memory-load portion. Note also that conditional operations such + as cmpxchg_release() are only guaranteed to provide ordering + when they succeed. + +As mentioned earlier, release operations are often paired with acquire +operations, which are the subject of the next section. + + +Acquire Operations +------------------ + +Acquire operations include smp_load_acquire(), atomic_read_acquire(), +and value-returning RMW operations whose names end in _acquire. These +operations order their own load against all of the CPU's subsequent +memory accesses. Acquire operations often provide improved performance +and readability compared to explicit barriers. For example, use of +smp_load_acquire() saves a line compared to the smp_rmb() example above: + + r0 = smp_load_acquire(&y); + r1 = READ_ONCE(x); + +As with smp_store_release(), this also makes it easier to connect +the different pieces of the concurrent algorithm by looking for the +smp_store_release() that stores to "y". In addition, smp_load_acquire() +improves upon smp_rmb() by ordering against subsequent stores as well +as against subsequent loads. + +There are a couple of categories of acquire operations: + +o Load operations, including not only the aforementioned + smp_load_acquire(), but also atomic_read_acquire(), and + atomic64_read_acquire(). + +o Value-returning RMW operations whose names end in _acquire, + such as atomic_xchg_acquire() and atomic_cmpxchg_acquire(). + Note that acquire ordering is guaranteed only against the + memory-load portion of the RMW operation, and not against the + memory-store portion. Note also that conditional operations + such as atomic_cmpxchg_acquire() are only guaranteed to provide + ordering when they succeed. + +Symmetry being what it is, acquire operations are often paired with the +release operations covered earlier. For example, consider the following +example, where task0() and task1() execute concurrently: + + void task0(void) + { + WRITE_ONCE(x, 1); + smp_store_release(&y, 1); + } + + void task1(void) + { + r0 = smp_load_acquire(&y); + r1 = READ_ONCE(x); + } + +If "x" and "y" are both initially zero, then either r0's final value +will be zero or r1's final value will be one, thus providing the required +ordering. + + +RCU Read-Side Ordering +---------------------- + +This category includes read-side markers such as rcu_read_lock() +and rcu_read_unlock() as well as pointer-traversal primitives such as +rcu_dereference() and srcu_dereference(). + +Compared to locking primitives and RMW atomic operations, markers +for RCU read-side critical sections incur very low overhead because +they interact only with the corresponding grace-period primitives. +For example, the rcu_read_lock() and rcu_read_unlock() markers interact +with synchronize_rcu(), synchronize_rcu_expedited(), and call_rcu(). +The way this works is that if a given call to synchronize_rcu() cannot +prove that it started before a given call to rcu_read_lock(), then +that synchronize_rcu() must block until the matching rcu_read_unlock() +is reached. For more information, please see the synchronize_rcu() +docbook header comment and the material in Documentation/RCU. + +RCU's pointer-traversal primitives, including rcu_dereference() and +srcu_dereference(), order their load (which must be a pointer) against any +of the CPU's subsequent memory accesses whose address has been calculated +from the value loaded. There is said to be an *address dependency* +from the value returned by the rcu_dereference() or srcu_dereference() +to that subsequent memory access. + +A call to rcu_dereference() for a given RCU-protected pointer is +usually paired with a call to a call to rcu_assign_pointer() for that +same pointer in much the same way that a call to smp_load_acquire() is +paired with a call to smp_store_release(). Calls to rcu_dereference() +and rcu_assign_pointer are often buried in other APIs, for example, +the RCU list API members defined in include/linux/rculist.h. For more +information, please see the docbook headers in that file, the most +recent LWN article on the RCU API (https://lwn.net/Articles/777036/), +and of course the material in Documentation/RCU. + +If the pointer value is manipulated between the rcu_dereference() +that returned it and a later dereference(), please read +Documentation/RCU/rcu_dereference.rst. It can also be quite helpful to +review uses in the Linux kernel. + + +Control Dependencies +-------------------- + +A control dependency extends from a marked load (READ_ONCE() or stronger) +through an "if" condition to a marked store (WRITE_ONCE() or stronger) +that is executed only by one of the legs of that "if" statement. +Control dependencies are so named because they are mediated by +control-flow instructions such as comparisons and conditional branches. + +In short, you can use a control dependency to enforce ordering between +an READ_ONCE() and a WRITE_ONCE() when there is an "if" condition +between them. The canonical example is as follows: + + q = READ_ONCE(a); + if (q) + WRITE_ONCE(b, 1); + +In this case, all CPUs would see the read from "a" as happening before +the write to "b". + +However, control dependencies are easily destroyed by compiler +optimizations, so any use of control dependencies must take into account +all of the compilers used to build the Linux kernel. Please see the +"control-dependencies.txt" file for more information. + + +Unordered Accesses +================== + +Each of these two categories of unordered accesses has a section below: + +a. Unordered marked operations. + +b. Unmarked C-language accesses. + + +Unordered Marked Operations +--------------------------- + +Unordered operations to different variables are just that, unordered. +However, if a group of CPUs apply these operations to a single variable, +all the CPUs will agree on the operation order. Of course, the ordering +of unordered marked accesses can also be constrained using the mechanisms +described earlier in this document. + +These operations come in three categories: + +o Marked writes, such as WRITE_ONCE() and atomic_set(). These + primitives required the compiler to emit the corresponding store + instructions in the expected execution order, thus suppressing + a number of destructive optimizations. However, they provide no + hardware ordering guarantees, and in fact many CPUs will happily + reorder marked writes with each other or with other unordered + operations, unless these operations are to the same variable. + +o Marked reads, such as READ_ONCE() and atomic_read(). These + primitives required the compiler to emit the corresponding load + instructions in the expected execution order, thus suppressing + a number of destructive optimizations. However, they provide no + hardware ordering guarantees, and in fact many CPUs will happily + reorder marked reads with each other or with other unordered + operations, unless these operations are to the same variable. + +o Unordered RMW atomic operations. These are non-value-returning + RMW atomic operations whose names do not end in _acquire or + _release, and also value-returning RMW operations whose names + end in _relaxed. Examples include atomic_add(), atomic_or(), + and atomic64_fetch_xor_relaxed(). These operations do carry + out the specified RMW operation atomically, for example, five + concurrent atomic_inc() operations applied to a given variable + will reliably increase the value of that variable by five. + However, many CPUs will happily reorder these operations with + each other or with other unordered operations. + + This category of operations can be efficiently ordered using + smp_mb__before_atomic() and smp_mb__after_atomic(), as was + discussed in the "RMW Ordering Augmentation Barriers" section. + +In short, these operations can be freely reordered unless they are all +operating on a single variable or unless they are constrained by one of +the operations called out earlier in this document. + + +Unmarked C-Language Accesses +---------------------------- + +Unmarked C-language accesses are normal variable accesses to normal +variables, that is, to variables that are not "volatile" and are not +C11 atomic variables. These operations provide no ordering guarantees, +and further do not guarantee "atomic" access. For example, the compiler +might (and sometimes does) split a plain C-language store into multiple +smaller stores. A load from that same variable running on some other +CPU while such a store is executing might see a value that is a mashup +of the old value and the new value. + +Unmarked C-language accesses are unordered, and are also subject to +any number of compiler optimizations, many of which can break your +concurrent code. It is possible to used unmarked C-language accesses for +shared variables that are subject to concurrent access, but great care +is required on an ongoing basis. The compiler-constraining barrier() +primitive can be helpful, as can the various ordering primitives discussed +in this document. It nevertheless bears repeating that use of unmarked +C-language accesses requires careful attention to not just your code, +but to all the compilers that might be used to build it. Such compilers +might replace a series of loads with a single load, and might replace +a series of stores with a single store. Some compilers will even split +a single store into multiple smaller stores. + +But there are some ways of using unmarked C-language accesses for shared +variables without such worries: + +o Guard all accesses to a given variable by a particular lock, + so that there are never concurrent conflicting accesses to + that variable. (There are "conflicting accesses" when + (1) at least one of the concurrent accesses to a variable is an + unmarked C-language access and (2) when at least one of those + accesses is a write, whether marked or not.) + +o As above, but using other synchronization primitives such + as reader-writer locks or sequence locks. + +o Use locking or other means to ensure that all concurrent accesses + to a given variable are reads. + +o Restrict use of a given variable to statistics or heuristics + where the occasional bogus value can be tolerated. + +o Declare the accessed variables as C11 atomics. + https://lwn.net/Articles/691128/ + +o Declare the accessed variables as "volatile". + +If you need to live more dangerously, please do take the time to +understand the compilers. One place to start is these two LWN +articles: + +Who's afraid of a big bad optimizing compiler? + https://lwn.net/Articles/793253 +Calibrating your fear of big bad optimizing compilers + https://lwn.net/Articles/799218 + +Used properly, unmarked C-language accesses can reduce overhead on +fastpaths. However, the price is great care and continual attention +to your compiler as new versions come out and as new optimizations +are enabled. -- cgit v1.3.1 From 0a27ce6b6968866fa8e3bd70371d67752db7718f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 22 Oct 2020 15:16:08 -0700 Subject: tools/memory-model: Add a glossary of LKMM terms [ paulmck: Apply Alan Stern feedback. ] Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/glossary.txt | 172 ++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 tools/memory-model/Documentation/glossary.txt (limited to 'tools') diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt new file mode 100644 index 000000000000..79acb75d56ea --- /dev/null +++ b/tools/memory-model/Documentation/glossary.txt @@ -0,0 +1,172 @@ +This document contains brief definitions of LKMM-related terms. Like most +glossaries, it is not intended to be read front to back (except perhaps +as a way of confirming a diagnosis of OCD), but rather to be searched +for specific terms. + + +Address Dependency: When the address of a later memory access is computed + based on the value returned by an earlier load, an "address + dependency" extends from that load extending to the later access. + Address dependencies are quite common in RCU read-side critical + sections: + + 1 rcu_read_lock(); + 2 p = rcu_dereference(gp); + 3 do_something(p->a); + 4 rcu_read_unlock(); + + In this case, because the address of "p->a" on line 3 is computed + from the value returned by the rcu_dereference() on line 2, the + address dependency extends from that rcu_dereference() to that + "p->a". In rare cases, optimizing compilers can destroy address + dependencies. Please see Documentation/RCU/rcu_dereference.txt + for more information. + + See also "Control Dependency" and "Data Dependency". + +Acquire: With respect to a lock, acquiring that lock, for example, + using spin_lock(). With respect to a non-lock shared variable, + a special operation that includes a load and which orders that + load before later memory references running on that same CPU. + An example special acquire operation is smp_load_acquire(), + but atomic_read_acquire() and atomic_xchg_acquire() also include + acquire loads. + + When an acquire load returns the value stored by a release store + to that same variable, then all operations preceding that store + happen before any operations following that load acquire. + + See also "Relaxed" and "Release". + +Coherence (co): When one CPU's store to a given variable overwrites + either the value from another CPU's store or some later value, + there is said to be a coherence link from the second CPU to + the first. + + It is also possible to have a coherence link within a CPU, which + is a "coherence internal" (coi) link. The term "coherence + external" (coe) link is used when it is necessary to exclude + the coi case. + + See also "From-reads" and "Reads-from". + +Control Dependency: When a later store's execution depends on a test + of a value computed from a value returned by an earlier load, + a "control dependency" extends from that load to that store. + For example: + + 1 if (READ_ONCE(x)) + 2 WRITE_ONCE(y, 1); + + Here, the control dependency extends from the READ_ONCE() on + line 1 to the WRITE_ONCE() on line 2. Control dependencies are + fragile, and can be easily destroyed by optimizing compilers. + Please see control-dependencies.txt for more information. + + See also "Address Dependency" and "Data Dependency". + +Cycle: Memory-barrier pairing is restricted to a pair of CPUs, as the + name suggests. And in a great many cases, a pair of CPUs is all + that is required. In other cases, the notion of pairing must be + extended to additional CPUs, and the result is called a "cycle". + In a cycle, each CPU's ordering interacts with that of the next: + + CPU 0 CPU 1 CPU 2 + WRITE_ONCE(x, 1); WRITE_ONCE(y, 1); WRITE_ONCE(z, 1); + smp_mb(); smp_mb(); smp_mb(); + r0 = READ_ONCE(y); r1 = READ_ONCE(z); r2 = READ_ONCE(x); + + CPU 0's smp_mb() interacts with that of CPU 1, which interacts + with that of CPU 2, which in turn interacts with that of CPU 0 + to complete the cycle. Because of the smp_mb() calls between + each pair of memory accesses, the outcome where r0, r1, and r2 + are all equal to zero is forbidden by LKMM. + + See also "Pairing". + +Data Dependency: When the data written by a later store is computed based + on the value returned by an earlier load, a "data dependency" + extends from that load to that later store. For example: + + 1 r1 = READ_ONCE(x); + 2 WRITE_ONCE(y, r1 + 1); + + In this case, the data dependency extends from the READ_ONCE() + on line 1 to the WRITE_ONCE() on line 2. Data dependencies are + fragile and can be easily destroyed by optimizing compilers. + Because optimizing compilers put a great deal of effort into + working out what values integer variables might have, this is + especially true in cases where the dependency is carried through + an integer. + + See also "Address Dependency" and "Control Dependency". + +From-Reads (fr): When one CPU's store to a given variable happened + too late to affect the value returned by another CPU's + load from that same variable, there is said to be a from-reads + link from the load to the store. + + It is also possible to have a from-reads link within a CPU, which + is a "from-reads internal" (fri) link. The term "from-reads + external" (fre) link is used when it is necessary to exclude + the fri case. + + See also "Coherence" and "Reads-from". + +Fully Ordered: An operation such as smp_mb() that orders all of + its CPU's prior accesses with all of that CPU's subsequent + accesses, or a marked access such as atomic_add_return() + that orders all of its CPU's prior accesses, itself, and + all of its CPU's subsequent accesses. + +Marked Access: An access to a variable that uses an special function or + macro such as "r1 = READ_ONCE(x)" or "smp_store_release(&a, 1)". + + See also "Unmarked Access". + +Pairing: "Memory-barrier pairing" reflects the fact that synchronizing + data between two CPUs requires that both CPUs their accesses. + Memory barriers thus tend to come in pairs, one executed by + one of the CPUs and the other by the other CPU. Of course, + pairing also occurs with other types of operations, so that a + smp_store_release() pairs with an smp_load_acquire() that reads + the value stored. + + See also "Cycle". + +Reads-From (rf): When one CPU's load returns the value stored by some other + CPU, there is said to be a reads-from link from the second + CPU's store to the first CPU's load. Reads-from links have the + nice property that time must advance from the store to the load, + which means that algorithms using reads-from links can use lighter + weight ordering and synchronization compared to algorithms using + coherence and from-reads links. + + It is also possible to have a reads-from link within a CPU, which + is a "reads-from internal" (rfi) link. The term "reads-from + external" (rfe) link is used when it is necessary to exclude + the rfi case. + + See also Coherence" and "From-reads". + +Relaxed: A marked access that does not imply ordering, for example, a + READ_ONCE(), WRITE_ONCE(), a non-value-returning read-modify-write + operation, or a value-returning read-modify-write operation whose + name ends in "_relaxed". + + See also "Acquire" and "Release". + +Release: With respect to a lock, releasing that lock, for example, + using spin_unlock(). With respect to a non-lock shared variable, + a special operation that includes a store and which orders that + store after earlier memory references that ran on that same CPU. + An example special release store is smp_store_release(), but + atomic_set_release() and atomic_cmpxchg_release() also include + release stores. + + See also "Acquire" and "Relaxed". + +Unmarked Access: An access to a variable that uses normal C-language + syntax, for example, "a = b[2]"; + + See also "Marked Access". -- cgit v1.3.1 From 1947bfcf81a905e84a58b423063e81034a90efed Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Nov 2020 13:20:56 -0800 Subject: tools/memory-model: Add types to litmus tests This commit adds type information for global variables in the litmus tests in order to allow easier use with klitmus7. Signed-off-by: Paul E. McKenney --- tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus | 4 +++- tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus | 4 +++- tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus | 4 +++- tools/memory-model/litmus-tests/CoWW+poonceonce.litmus | 4 +++- .../litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus | 5 ++++- tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus | 5 ++++- .../litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus | 7 ++++++- tools/memory-model/litmus-tests/ISA2+poonceonces.litmus | 6 +++++- .../ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus | 6 +++++- .../litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus | 5 ++++- .../litmus-tests/LB+poacquireonce+pooncerelease.litmus | 5 ++++- tools/memory-model/litmus-tests/LB+poonceonces.litmus | 5 ++++- .../litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus | 5 ++++- tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus | 5 +++-- .../litmus-tests/MP+polockmbonce+poacquiresilsil.litmus | 2 ++ .../memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus | 2 ++ tools/memory-model/litmus-tests/MP+polocks.litmus | 6 +++++- tools/memory-model/litmus-tests/MP+poonceonces.litmus | 5 ++++- .../litmus-tests/MP+pooncerelease+poacquireonce.litmus | 5 ++++- tools/memory-model/litmus-tests/MP+porevlocks.litmus | 6 +++++- tools/memory-model/litmus-tests/R+fencembonceonces.litmus | 5 ++++- tools/memory-model/litmus-tests/R+poonceonces.litmus | 5 ++++- .../litmus-tests/S+fencewmbonceonce+poacquireonce.litmus | 5 ++++- tools/memory-model/litmus-tests/S+poonceonces.litmus | 5 ++++- tools/memory-model/litmus-tests/SB+fencembonceonces.litmus | 5 ++++- tools/memory-model/litmus-tests/SB+poonceonces.litmus | 5 ++++- tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus | 5 ++++- tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus | 5 ++++- .../litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus | 5 ++++- .../litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus | 7 ++++++- .../litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus | 7 ++++++- .../Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus | 6 +++++- 32 files changed, 130 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus index 967f9f2a6226..772544f03fb5 100644 --- a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus @@ -7,7 +7,9 @@ C CoRR+poonceonce+Once * reads from the same variable are ordered. *) -{} +{ + int x; +} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus index 4635739f3974..5faae98f7ffb 100644 --- a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus @@ -7,7 +7,9 @@ C CoRW+poonceonce+Once * a given variable and a later write to that same variable are ordered. *) -{} +{ + int x; +} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus index bb068c92d8da..77c9cc9f8dc6 100644 --- a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus @@ -7,7 +7,9 @@ C CoWR+poonceonce+Once * given variable and a later read from that same variable are ordered. *) -{} +{ + int x; +} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus index 0d9f0a958799..85ef746f511a 100644 --- a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus +++ b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus @@ -7,7 +7,9 @@ C CoWW+poonceonce * writes to the same variable are ordered. *) -{} +{ + int x; +} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus index e729d2776e89..87aa900125ab 100644 --- a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus +++ b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus @@ -10,7 +10,10 @@ C IRIW+fencembonceonces+OnceOnce * process? This litmus test exercises LKMM's "propagation" rule. *) -{} +{ + int x; + int y; +} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus index 4b54dd6a6cd9..f84022dca555 100644 --- a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus +++ b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus @@ -10,7 +10,10 @@ C IRIW+poonceonces+OnceOnce * different process? *) -{} +{ + int x; + int y; +} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus index 094d58df7789..398f624daa77 100644 --- a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus +++ b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus @@ -7,7 +7,12 @@ C ISA2+pooncelock+pooncelock+pombonce * (in P0() and P1()) is visible to external process P2(). *) -{} +{ + spinlock_t mylock; + int x; + int y; + int z; +} P0(int *x, int *y, spinlock_t *mylock) { diff --git a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus index b321aa6f4ea5..212a432ba16b 100644 --- a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus @@ -9,7 +9,11 @@ C ISA2+poonceonces * of the smp_load_acquire() invocations are replaced by READ_ONCE()? *) -{} +{ + int x; + int y; + int z; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus index 025b0462ec9b..7afd85672ccd 100644 --- a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus +++ b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus @@ -11,7 +11,11 @@ C ISA2+pooncerelease+poacquirerelease+poacquireonce * (AKA non-rf) link, so release-acquire is all that is needed. *) -{} +{ + int x; + int y; + int z; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus index 4727f5aaf03b..c8a93c7ee556 100644 --- a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus +++ b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus @@ -11,7 +11,10 @@ C LB+fencembonceonce+ctrlonceonce * another control dependency and order would still be maintained.) *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus index 07b9904b0e49..2fa029568fa1 100644 --- a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus +++ b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus @@ -8,7 +8,10 @@ C LB+poacquireonce+pooncerelease * to the other? *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/LB+poonceonces.litmus b/tools/memory-model/litmus-tests/LB+poonceonces.litmus index 74c49cb3c37b..2107306e8625 100644 --- a/tools/memory-model/litmus-tests/LB+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/LB+poonceonces.litmus @@ -7,7 +7,10 @@ C LB+poonceonces * be prevented even with no explicit ordering? *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus index a273da9faa6d..e04b71b0ce2b 100644 --- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus +++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus @@ -8,7 +8,10 @@ C MP+fencewmbonceonce+fencermbonceonce * is usually better to use smp_store_release() and smp_load_acquire(). *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus index 97731b4bbdd8..18df682b08b2 100644 --- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus +++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus @@ -10,8 +10,9 @@ C MP+onceassign+derefonce *) { -y=z; -z=0; + int x; + int *y=z; + int z=0; } P0(int *x, int **y) diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus index 50f4d62bbf0e..b1b1266fb49a 100644 --- a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus +++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus @@ -11,6 +11,8 @@ C MP+polockmbonce+poacquiresilsil *) { + spinlock_t lo; + int x; } P0(spinlock_t *lo, int *x) diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus index abf81e7a0895..867c75d8b960 100644 --- a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus +++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus @@ -11,6 +11,8 @@ C MP+polockonce+poacquiresilsil *) { + spinlock_t lo; + int x; } P0(spinlock_t *lo, int *x) diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus index 712a4fcdf6ce..63e0f67c9b9d 100644 --- a/tools/memory-model/litmus-tests/MP+polocks.litmus +++ b/tools/memory-model/litmus-tests/MP+polocks.litmus @@ -11,7 +11,11 @@ C MP+polocks * to see all prior accesses by those other CPUs. *) -{} +{ + spinlock_t mylock; + int x; + int y; +} P0(int *x, int *y, spinlock_t *mylock) { diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus index 172f0145301c..68180a403e5c 100644 --- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus @@ -7,7 +7,10 @@ C MP+poonceonces * no ordering at all? *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus index d52c68429722..19f3e6874b50 100644 --- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus +++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus @@ -8,7 +8,10 @@ C MP+pooncerelease+poacquireonce * pattern. *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus index 72c9276b363e..4ac189adf41e 100644 --- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus +++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus @@ -11,7 +11,11 @@ C MP+porevlocks * see all prior accesses by those other CPUs. *) -{} +{ + spinlock_t mylock; + int x; + int y; +} P0(int *x, int *y, spinlock_t *mylock) { diff --git a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus index 222a0b850b4a..af9463b39b4a 100644 --- a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus +++ b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus @@ -9,7 +9,10 @@ C R+fencembonceonces * cause the resulting test to be allowed. *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/R+poonceonces.litmus b/tools/memory-model/litmus-tests/R+poonceonces.litmus index 5386f128a131..bcd5574e304a 100644 --- a/tools/memory-model/litmus-tests/R+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/R+poonceonces.litmus @@ -8,7 +8,10 @@ C R+poonceonces * store propagation delays. *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus index 18479823cd6c..c36341d1aed6 100644 --- a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus +++ b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus @@ -7,7 +7,10 @@ C S+fencewmbonceonce+poacquireonce * store against a subsequent store? *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/S+poonceonces.litmus b/tools/memory-model/litmus-tests/S+poonceonces.litmus index 8c9c2f81a580..7775c23143a0 100644 --- a/tools/memory-model/litmus-tests/S+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/S+poonceonces.litmus @@ -9,7 +9,10 @@ C S+poonceonces * READ_ONCE(), is ordering preserved? *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus index ed5fff18d223..833cdfeb7c09 100644 --- a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus +++ b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus @@ -9,7 +9,10 @@ C SB+fencembonceonces * suffice, but not much else.) *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/SB+poonceonces.litmus b/tools/memory-model/litmus-tests/SB+poonceonces.litmus index 10d550730b25..c92211ecbfdf 100644 --- a/tools/memory-model/litmus-tests/SB+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/SB+poonceonces.litmus @@ -8,7 +8,10 @@ C SB+poonceonces * variable that the preceding process reads. *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus index 04a16603660b..84344b455eb7 100644 --- a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus +++ b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus @@ -6,7 +6,10 @@ C SB+rfionceonce-poonceonces * This litmus test demonstrates that LKMM is not fully multicopy atomic. *) -{} +{ + int x; + int y; +} P0(int *x, int *y) { diff --git a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus index 6a2bc12a1af1..431494708611 100644 --- a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus +++ b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus @@ -8,7 +8,10 @@ C WRC+poonceonces+Once * test has no ordering at all. *) -{} +{ + int x; + int y; +} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus index e9947250d7de..554999c64db5 100644 --- a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus @@ -10,7 +10,10 @@ C WRC+pooncerelease+fencermbonceonce+Once * is A-cumulative in LKMM. *) -{} +{ + int x; + int y; +} P0(int *x) { diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus index 415248fb6699..265a95ffef13 100644 --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus @@ -9,7 +9,12 @@ C Z6.0+pooncelock+poonceLock+pombonce * by CPUs not holding that lock. *) -{} +{ + spinlock_t mylock; + int x; + int y; + int z; +} P0(int *x, int *y, spinlock_t *mylock) { diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus index 10a2aa04cd07..0c9aea8e80df 100644 --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus @@ -8,7 +8,12 @@ C Z6.0+pooncelock+pooncelock+pombonce * seen as ordered by a third process not holding that lock. *) -{} +{ + spinlock_t mylock; + int x; + int y; + int z; +} P0(int *x, int *y, spinlock_t *mylock) { diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus index 88e70b87a683..661f9aaa5791 100644 --- a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus +++ b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus @@ -14,7 +14,11 @@ C Z6.0+pooncerelease+poacquirerelease+fencembonceonce * involving locking.) *) -{} +{ + int x; + int y; + int z; +} P0(int *x, int *y) { -- cgit v1.3.1 From acc4bdc55dcb7d7fe0be736999572a55e121873f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Nov 2020 13:30:11 -0800 Subject: tools/memory-model: Use "buf" and "flag" for message-passing tests The use of "x" and "y" for message-passing tests is fine for people familiar with memory models and litmus-test nomenclature, but is a bit obtuse for others. This commit therefore substitutes "buf" for "x" and "flag" for "y" for the MP tests. There are a few special-case MP tests that use locks and these are unchanged. There is another MP test that uses pointers, and this is changed to name the pointer "p". Reported-by: Johannes Weiner Signed-off-by: Paul E. McKenney --- .../MP+fencewmbonceonce+fencermbonceonce.litmus | 16 ++++++++-------- .../litmus-tests/MP+onceassign+derefonce.litmus | 12 ++++++------ tools/memory-model/litmus-tests/MP+polocks.litmus | 16 ++++++++-------- tools/memory-model/litmus-tests/MP+poonceonces.litmus | 16 ++++++++-------- .../litmus-tests/MP+pooncerelease+poacquireonce.litmus | 16 ++++++++-------- tools/memory-model/litmus-tests/MP+porevlocks.litmus | 16 ++++++++-------- 6 files changed, 46 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus index e04b71b0ce2b..f15e501849dd 100644 --- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus +++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus @@ -9,25 +9,25 @@ C MP+fencewmbonceonce+fencermbonceonce *) { - int x; - int y; + int buf; + int flag; } -P0(int *x, int *y) +P0(int *buf, int *flag) { - WRITE_ONCE(*x, 1); + WRITE_ONCE(*buf, 1); smp_wmb(); - WRITE_ONCE(*y, 1); + WRITE_ONCE(*flag, 1); } -P1(int *x, int *y) +P1(int *buf, int *flag) { int r0; int r1; - r0 = READ_ONCE(*y); + r0 = READ_ONCE(*flag); smp_rmb(); - r1 = READ_ONCE(*x); + r1 = READ_ONCE(*buf); } exists (1:r0=1 /\ 1:r1=0) diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus index 18df682b08b2..ed8ee9bde0c9 100644 --- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus +++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus @@ -10,24 +10,24 @@ C MP+onceassign+derefonce *) { + int *p=y; int x; - int *y=z; - int z=0; + int y=0; } -P0(int *x, int **y) +P0(int *x, int **p) { WRITE_ONCE(*x, 1); - rcu_assign_pointer(*y, x); + rcu_assign_pointer(*p, x); } -P1(int *x, int **y) +P1(int *x, int **p) { int *r0; int r1; rcu_read_lock(); - r0 = rcu_dereference(*y); + r0 = rcu_dereference(*p); r1 = READ_ONCE(*r0); rcu_read_unlock(); } diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus index 63e0f67c9b9d..4b0c2edcc029 100644 --- a/tools/memory-model/litmus-tests/MP+polocks.litmus +++ b/tools/memory-model/litmus-tests/MP+polocks.litmus @@ -13,27 +13,27 @@ C MP+polocks { spinlock_t mylock; - int x; - int y; + int buf; + int flag; } -P0(int *x, int *y, spinlock_t *mylock) +P0(int *buf, int *flag, spinlock_t *mylock) { - WRITE_ONCE(*x, 1); + WRITE_ONCE(*buf, 1); spin_lock(mylock); - WRITE_ONCE(*y, 1); + WRITE_ONCE(*flag, 1); spin_unlock(mylock); } -P1(int *x, int *y, spinlock_t *mylock) +P1(int *buf, int *flag, spinlock_t *mylock) { int r0; int r1; spin_lock(mylock); - r0 = READ_ONCE(*y); + r0 = READ_ONCE(*flag); spin_unlock(mylock); - r1 = READ_ONCE(*x); + r1 = READ_ONCE(*buf); } exists (1:r0=1 /\ 1:r1=0) diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus index 68180a403e5c..3010bbaec46c 100644 --- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus @@ -8,23 +8,23 @@ C MP+poonceonces *) { - int x; - int y; + int buf; + int flag; } -P0(int *x, int *y) +P0(int *buf, int *flag) { - WRITE_ONCE(*x, 1); - WRITE_ONCE(*y, 1); + WRITE_ONCE(*buf, 1); + WRITE_ONCE(*flag, 1); } -P1(int *x, int *y) +P1(int *buf, int *flag) { int r0; int r1; - r0 = READ_ONCE(*y); - r1 = READ_ONCE(*x); + r0 = READ_ONCE(*flag); + r1 = READ_ONCE(*buf); } exists (1:r0=1 /\ 1:r1=0) diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus index 19f3e6874b50..21e825d5dea6 100644 --- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus +++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus @@ -9,23 +9,23 @@ C MP+pooncerelease+poacquireonce *) { - int x; - int y; + int buf; + int flag; } -P0(int *x, int *y) +P0(int *buf, int *flag) { - WRITE_ONCE(*x, 1); - smp_store_release(y, 1); + WRITE_ONCE(*buf, 1); + smp_store_release(flag, 1); } -P1(int *x, int *y) +P1(int *buf, int *flag) { int r0; int r1; - r0 = smp_load_acquire(y); - r1 = READ_ONCE(*x); + r0 = smp_load_acquire(flag); + r1 = READ_ONCE(*buf); } exists (1:r0=1 /\ 1:r1=0) diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus index 4ac189adf41e..9691d55b4e21 100644 --- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus +++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus @@ -13,27 +13,27 @@ C MP+porevlocks { spinlock_t mylock; - int x; - int y; + int buf; + int flag; } -P0(int *x, int *y, spinlock_t *mylock) +P0(int *buf, int *flag, spinlock_t *mylock) { int r0; int r1; - r0 = READ_ONCE(*y); + r0 = READ_ONCE(*flag); spin_lock(mylock); - r1 = READ_ONCE(*x); + r1 = READ_ONCE(*buf); spin_unlock(mylock); } -P1(int *x, int *y, spinlock_t *mylock) +P1(int *buf, int *flag, spinlock_t *mylock) { spin_lock(mylock); - WRITE_ONCE(*x, 1); + WRITE_ONCE(*buf, 1); spin_unlock(mylock); - WRITE_ONCE(*y, 1); + WRITE_ONCE(*flag, 1); } exists (0:r0=1 /\ 0:r1=0) -- cgit v1.3.1 From b6ff30849ca723b78306514246b98ca5645d92f5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Nov 2020 13:39:28 -0800 Subject: tools/memory-model: Label MP tests' producers and consumers This commit adds comments that label the MP tests' producer and consumer processes, and also that label the "exists" clause as the bad outcome. Reported-by: Johannes Weiner Signed-off-by: Paul E. McKenney --- .../litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus | 6 +++--- tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus | 6 +++--- .../litmus-tests/MP+polockmbonce+poacquiresilsil.litmus | 6 +++--- .../memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus | 6 +++--- tools/memory-model/litmus-tests/MP+polocks.litmus | 6 +++--- tools/memory-model/litmus-tests/MP+poonceonces.litmus | 6 +++--- .../memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus | 6 +++--- tools/memory-model/litmus-tests/MP+porevlocks.litmus | 6 +++--- 8 files changed, 24 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus index f15e501849dd..c5c168d92973 100644 --- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus +++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus @@ -13,14 +13,14 @@ C MP+fencewmbonceonce+fencermbonceonce int flag; } -P0(int *buf, int *flag) +P0(int *buf, int *flag) // Producer { WRITE_ONCE(*buf, 1); smp_wmb(); WRITE_ONCE(*flag, 1); } -P1(int *buf, int *flag) +P1(int *buf, int *flag) // Consumer { int r0; int r1; @@ -30,4 +30,4 @@ P1(int *buf, int *flag) r1 = READ_ONCE(*buf); } -exists (1:r0=1 /\ 1:r1=0) +exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *) diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus index ed8ee9bde0c9..20ff62649f1e 100644 --- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus +++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus @@ -15,13 +15,13 @@ C MP+onceassign+derefonce int y=0; } -P0(int *x, int **p) +P0(int *x, int **p) // Producer { WRITE_ONCE(*x, 1); rcu_assign_pointer(*p, x); } -P1(int *x, int **p) +P1(int *x, int **p) // Consumer { int *r0; int r1; @@ -32,4 +32,4 @@ P1(int *x, int **p) rcu_read_unlock(); } -exists (1:r0=x /\ 1:r1=0) +exists (1:r0=x /\ 1:r1=0) (* Bad outcome. *) diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus index b1b1266fb49a..153917ad5dc9 100644 --- a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus +++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus @@ -15,7 +15,7 @@ C MP+polockmbonce+poacquiresilsil int x; } -P0(spinlock_t *lo, int *x) +P0(spinlock_t *lo, int *x) // Producer { spin_lock(lo); smp_mb__after_spinlock(); @@ -23,7 +23,7 @@ P0(spinlock_t *lo, int *x) spin_unlock(lo); } -P1(spinlock_t *lo, int *x) +P1(spinlock_t *lo, int *x) // Consumer { int r1; int r2; @@ -34,4 +34,4 @@ P1(spinlock_t *lo, int *x) r3 = spin_is_locked(lo); } -exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) +exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) (* Bad outcome. *) diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus index 867c75d8b960..aad64397bb8c 100644 --- a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus +++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus @@ -15,14 +15,14 @@ C MP+polockonce+poacquiresilsil int x; } -P0(spinlock_t *lo, int *x) +P0(spinlock_t *lo, int *x) // Producer { spin_lock(lo); WRITE_ONCE(*x, 1); spin_unlock(lo); } -P1(spinlock_t *lo, int *x) +P1(spinlock_t *lo, int *x) // Consumer { int r1; int r2; @@ -33,4 +33,4 @@ P1(spinlock_t *lo, int *x) r3 = spin_is_locked(lo); } -exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) +exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) (* Bad outcome. *) diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus index 4b0c2edcc029..21cbca6f3be4 100644 --- a/tools/memory-model/litmus-tests/MP+polocks.litmus +++ b/tools/memory-model/litmus-tests/MP+polocks.litmus @@ -17,7 +17,7 @@ C MP+polocks int flag; } -P0(int *buf, int *flag, spinlock_t *mylock) +P0(int *buf, int *flag, spinlock_t *mylock) // Producer { WRITE_ONCE(*buf, 1); spin_lock(mylock); @@ -25,7 +25,7 @@ P0(int *buf, int *flag, spinlock_t *mylock) spin_unlock(mylock); } -P1(int *buf, int *flag, spinlock_t *mylock) +P1(int *buf, int *flag, spinlock_t *mylock) // Consumer { int r0; int r1; @@ -36,4 +36,4 @@ P1(int *buf, int *flag, spinlock_t *mylock) r1 = READ_ONCE(*buf); } -exists (1:r0=1 /\ 1:r1=0) +exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *) diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus index 3010bbaec46c..9f9769d647c7 100644 --- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus +++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus @@ -12,13 +12,13 @@ C MP+poonceonces int flag; } -P0(int *buf, int *flag) +P0(int *buf, int *flag) // Producer { WRITE_ONCE(*buf, 1); WRITE_ONCE(*flag, 1); } -P1(int *buf, int *flag) +P1(int *buf, int *flag) // Consumer { int r0; int r1; @@ -27,4 +27,4 @@ P1(int *buf, int *flag) r1 = READ_ONCE(*buf); } -exists (1:r0=1 /\ 1:r1=0) +exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *) diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus index 21e825d5dea6..cbe28e733443 100644 --- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus +++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus @@ -13,13 +13,13 @@ C MP+pooncerelease+poacquireonce int flag; } -P0(int *buf, int *flag) +P0(int *buf, int *flag) // Producer { WRITE_ONCE(*buf, 1); smp_store_release(flag, 1); } -P1(int *buf, int *flag) +P1(int *buf, int *flag) // Consumer { int r0; int r1; @@ -28,4 +28,4 @@ P1(int *buf, int *flag) r1 = READ_ONCE(*buf); } -exists (1:r0=1 /\ 1:r1=0) +exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *) diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus index 9691d55b4e21..012041bd4feb 100644 --- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus +++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus @@ -17,7 +17,7 @@ C MP+porevlocks int flag; } -P0(int *buf, int *flag, spinlock_t *mylock) +P0(int *buf, int *flag, spinlock_t *mylock) // Consumer { int r0; int r1; @@ -28,7 +28,7 @@ P0(int *buf, int *flag, spinlock_t *mylock) spin_unlock(mylock); } -P1(int *buf, int *flag, spinlock_t *mylock) +P1(int *buf, int *flag, spinlock_t *mylock) // Producer { spin_lock(mylock); WRITE_ONCE(*buf, 1); @@ -36,4 +36,4 @@ P1(int *buf, int *flag, spinlock_t *mylock) WRITE_ONCE(*flag, 1); } -exists (0:r0=1 /\ 0:r1=0) +exists (0:r0=1 /\ 0:r1=0) (* Bad outcome. *) -- cgit v1.3.1 From df11f7dd5834146defa448acba097e8d7703cc42 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:13 -0700 Subject: selftests: kvm: Fix the segment descriptor layout to match the actual layout Fix the layout of 'struct desc64' to match the layout described in the SDM Vol 3, Chapter 3 "Protected-Mode Memory Management", section 3.4.5 "Segment Descriptors", Figure 3-8 "Segment Descriptor". The test added later in this series relies on this and crashes if this layout is not correct. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-2-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- tools/testing/selftests/kvm/lib/x86_64/processor.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 82b7fe16a824..0a65e7bb5249 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -59,7 +59,7 @@ struct gpr64_regs { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f6eb34eaa0d2..1ccf6c9b3476 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -392,11 +392,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->limit0 = segp->limit & 0xFFFF; desc->base0 = segp->base & 0xFFFF; desc->base1 = segp->base >> 16; - desc->s = segp->s; desc->type = segp->type; + desc->s = segp->s; desc->dpl = segp->dpl; desc->p = segp->present; desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; desc->l = segp->l; desc->db = segp->db; desc->g = segp->g; -- cgit v1.3.1 From 85f2a4320ef27ce74b9da0631460561028c48756 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:14 -0700 Subject: selftests: kvm: Clear uc so UCALL_NONE is being properly reported Ensure the out value 'uc' in get_ucall() is properly reporting UCALL_NONE if the call fails. The return value will be correctly reported, however, the out parameter 'uc' will not be. Clear the struct to ensure the correct value is being reported in the out parameter. Signed-off-by: Aaron Lewis Reviewed-by: Andrew Jones Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-3-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/ucall.c | 3 +++ tools/testing/selftests/kvm/lib/s390x/ucall.c | 3 +++ tools/testing/selftests/kvm/lib/x86_64/ucall.c | 3 +++ 3 files changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index c8e0ec20d3bf..2f37b90ee1a9 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { vm_vaddr_t gva; diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index fd589dc9bfab..9d3b0f15249a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index da4d89ad5419..a3489973e290 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; -- cgit v1.3.1 From 29faeb9632012d6c3fa4aa33c3d589b9ff18b206 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:15 -0700 Subject: selftests: kvm: Add exception handling to selftests Add the infrastructure needed to enable exception handling in selftests. This allows any of the exception and interrupt vectors to be overridden in the guest. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-4-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 19 ++-- tools/testing/selftests/kvm/include/kvm_util.h | 2 + .../selftests/kvm/include/x86_64/processor.h | 24 +++++ .../testing/selftests/kvm/lib/aarch64/processor.c | 4 + tools/testing/selftests/kvm/lib/kvm_util.c | 3 + .../testing/selftests/kvm/lib/kvm_util_internal.h | 2 + tools/testing/selftests/kvm/lib/s390x/processor.c | 4 + tools/testing/selftests/kvm/lib/x86_64/handlers.S | 81 +++++++++++++++ tools/testing/selftests/kvm/lib/x86_64/processor.c | 114 ++++++++++++++++++++- 9 files changed, 244 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/x86_64/handlers.S (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 30afbad36cd5..b0f1fcab79f5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -34,7 +34,7 @@ ifeq ($(ARCH),s390) endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c @@ -111,14 +111,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option) include ../lib.mk STATIC_LIBS := $(OUTPUT)/libkvm.a -LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) -EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.* +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.* + +x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) -$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) +$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS) $(AR) crs $@ $^ x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 919e161dd289..356930690bea 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -294,6 +294,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); memcpy(&(g), _p, sizeof(g)); \ }) +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); + /* Common ucalls */ enum { UCALL_NONE, diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0a65e7bb5249..02530dc6339b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -36,6 +36,8 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) +#define UNEXPECTED_VECTOR_PORT 0xfff0u + /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; @@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void) return idt; } +static inline void outl(uint16_t port, uint32_t value) +{ + __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -338,6 +345,23 @@ uint32_t kvm_get_cpuid_max_basic(void); uint32_t kvm_get_cpuid_max_extended(void); void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); +struct ex_regs { + uint64_t rax, rcx, rdx, rbx; + uint64_t rbp, rsi, rdi; + uint64_t r8, r9, r10, r11; + uint64_t r12, r13, r14, r15; + uint64_t vector; + uint64_t error_code; + uint64_t rip; + uint64_t cs; + uint64_t rflags; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 2afa6618b396..d6c32c328e9a 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 3327cebc1095..be230f3728d5 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1204,6 +1204,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) do { rc = ioctl(vcpu->fd, KVM_RUN, NULL); } while (rc == -1 && errno == EINTR); + + assert_on_unhandled_exception(vm, vcpuid); + return rc; } diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 2ef446520748..f07d383d03a1 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -50,6 +50,8 @@ struct kvm_vm { vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; + vm_vaddr_t idt; + vm_vaddr_t handlers; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index a88c5d665725..7349bb2e1a24 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S new file mode 100644 index 000000000000..aaf7bc7d2ce1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -0,0 +1,81 @@ +handle_exception: + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + + push %rdi + push %rsi + push %rbp + push %rbx + push %rdx + push %rcx + push %rax + mov %rsp, %rdi + + call route_exception + + pop %rax + pop %rcx + pop %rdx + pop %rbx + pop %rbp + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + /* Discard vector and error code. */ + add $16, %rsp + iretq + +/* + * Build the handle_exception wrappers which push the vector/error code on the + * stack and an array of pointers to those wrappers. + */ +.pushsection .rodata +.globl idt_handlers +idt_handlers: +.popsection + +.macro HANDLERS has_error from to + vector = \from + .rept \to - \from + 1 + .align 8 + + /* Fetch current address and append it to idt_handlers. */ + current_handler = . +.pushsection .rodata +.quad current_handler +.popsection + + .if ! \has_error + pushq $0 + .endif + pushq $vector + jmp handle_exception + vector = vector + 1 + .endr +.endm + +.global idt_handler_code +idt_handler_code: + HANDLERS has_error=0 from=0 to=7 + HANDLERS has_error=1 from=8 to=8 + HANDLERS has_error=0 from=9 to=9 + HANDLERS has_error=1 from=10 to=14 + HANDLERS has_error=0 from=15 to=16 + HANDLERS has_error=1 from=17 to=17 + HANDLERS has_error=0 from=18 to=255 + +.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 1ccf6c9b3476..66228297ac03 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -12,9 +12,18 @@ #include "../kvm_util_internal.h" #include "processor.h" +#ifndef NUM_INTERRUPTS +#define NUM_INTERRUPTS 256 +#endif + +#define DEFAULT_CODE_SELECTOR 0x8 +#define DEFAULT_DATA_SELECTOR 0x10 + /* Minimum physical address used for virtual translation tables. */ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 +vm_vaddr_t exception_handlers; + /* Virtual translation table structure declarations */ struct pageMapL4Entry { uint64_t present:1; @@ -557,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); + kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); break; @@ -1119,3 +1128,102 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) *va_bits = (entry->eax >> 8) & 0xff; } } + +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +void kvm_exit_unexpected_vector(uint32_t value) +{ + outl(UNEXPECTED_VECTOR_PORT, value); +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; + } + + kvm_exit_unexpected_vector(regs->vector); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + int i; + + vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0); + vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0); + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, + DEFAULT_CODE_SELECTOR); +} + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.idt.base = vm->idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->gdt; + sregs.gdt.limit = getpagesize() - 1; + kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); + vcpu_sregs_set(vm, vcpuid, &sregs); + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ + if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO + && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT + && vcpu_state(vm, vcpuid)->io.size == 4) { + /* Grab pointer to io data */ + uint32_t *data = (void *)vcpu_state(vm, vcpuid) + + vcpu_state(vm, vcpuid)->io.data_offset; + + TEST_ASSERT(false, + "Unexpected vectored event in guest (vector:0x%x)", + *data); + } +} -- cgit v1.3.1 From ac4a4d6de22e674cd6e3fe57199a15383496aad2 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 27 Oct 2020 16:10:44 -0700 Subject: selftests: kvm: test enforcement of paravirtual cpuid features Add a set of tests that ensure the guest cannot access paravirtual msrs and hypercalls that have been disabled in the KVM_CPUID_FEATURES leaf. Expect a #GP in the case of msr accesses and -KVM_ENOSYS from hypercalls. Cc: Jim Mattson Signed-off-by: Oliver Upton Reviewed-by: Peter Shier Reviewed-by: Aaron Lewis Message-Id: <20201027231044.655110-7-oupton@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/include/kvm_util.h | 3 + .../selftests/kvm/include/x86_64/processor.h | 12 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 28 +++ tools/testing/selftests/kvm/lib/x86_64/processor.c | 29 +++ tools/testing/selftests/kvm/x86_64/kvm_pv_test.c | 234 +++++++++++++++++++++ 7 files changed, 308 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d2c2d6205008..22973aa75eda 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,6 +5,7 @@ /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/kvm_pv_test /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b0f1fcab79f5..44ca6badb544 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -41,6 +41,7 @@ LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 356930690bea..77f53dbc34ff 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -63,6 +63,9 @@ enum vm_mem_backing_src_type { int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap); +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 02530dc6339b..8e61340b3911 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -362,6 +362,18 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); void vm_handle_exception(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +/* + * set_cpuid() - overwrites a matching cpuid entry with the provided value. + * matches based on ent->function && ent->index. returns true + * if a match was found and successfully overwritten. + * @cpuid: the kvm cpuid list to modify. + * @ent: cpuid entry to insert + */ +bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index be230f3728d5..91e547031d8c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -86,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } +/* VCPU Enable Capability + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - VCPU + * cap - Capability + * + * Output Args: None + * + * Return: On success, 0. On failure a TEST_ASSERT failure is produced. + * + * Enables a capability (KVM_CAP_*) on the VCPU. + */ +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpu_id); + int r; + + TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); + + r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); + TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" + " rc: %i, errno: %i", r, errno); + + return r; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 66228297ac03..d10c5c05bdf0 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1227,3 +1227,32 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) *data); } } + +bool set_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 *ent) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function != ent->function || cur->index != ent->index) + continue; + + memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2)); + return true; + } + + return false; +} + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3) +{ + uint64_t r; + + asm volatile("vmcall" + : "=a"(r) + : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + return r; +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c new file mode 100644 index 000000000000..b10a27485bad --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +extern unsigned char rdmsr_start; +extern unsigned char rdmsr_end; + +static u64 do_rdmsr(u32 idx) +{ + u32 lo, hi; + + asm volatile("rdmsr_start: rdmsr;" + "rdmsr_end:" + : "=a"(lo), "=c"(hi) + : "c"(idx)); + + return (((u64) hi) << 32) | lo; +} + +extern unsigned char wrmsr_start; +extern unsigned char wrmsr_end; + +static void do_wrmsr(u32 idx, u64 val) +{ + u32 lo, hi; + + lo = val; + hi = val >> 32; + + asm volatile("wrmsr_start: wrmsr;" + "wrmsr_end:" + : : "a"(lo), "c"(idx), "d"(hi)); +} + +static int nr_gp; + +static void guest_gp_handler(struct ex_regs *regs) +{ + unsigned char *rip = (unsigned char *)regs->rip; + bool r, w; + + r = rip == &rdmsr_start; + w = rip == &wrmsr_start; + GUEST_ASSERT(r || w); + + nr_gp++; + + if (r) + regs->rip = (uint64_t)&rdmsr_end; + else + regs->rip = (uint64_t)&wrmsr_end; +} + +struct msr_data { + uint32_t idx; + const char *name; +}; + +#define TEST_MSR(msr) { .idx = msr, .name = #msr } +#define UCALL_PR_MSR 0xdeadbeef +#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr) + +/* + * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or + * written, as the KVM_CPUID_FEATURES leaf is cleared. + */ +static struct msr_data msrs_to_test[] = { + TEST_MSR(MSR_KVM_SYSTEM_TIME), + TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW), + TEST_MSR(MSR_KVM_WALL_CLOCK), + TEST_MSR(MSR_KVM_WALL_CLOCK_NEW), + TEST_MSR(MSR_KVM_ASYNC_PF_EN), + TEST_MSR(MSR_KVM_STEAL_TIME), + TEST_MSR(MSR_KVM_PV_EOI_EN), + TEST_MSR(MSR_KVM_POLL_CONTROL), + TEST_MSR(MSR_KVM_ASYNC_PF_INT), + TEST_MSR(MSR_KVM_ASYNC_PF_ACK), +}; + +static void test_msr(struct msr_data *msr) +{ + PR_MSR(msr); + do_rdmsr(msr->idx); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + + nr_gp = 0; + do_wrmsr(msr->idx, 0); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + nr_gp = 0; +} + +struct hcall_data { + uint64_t nr; + const char *name; +}; + +#define TEST_HCALL(hc) { .nr = hc, .name = #hc } +#define UCALL_PR_HCALL 0xdeadc0de +#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc) + +/* + * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding + * features have been cleared in KVM_CPUID_FEATURES. + */ +static struct hcall_data hcalls_to_test[] = { + TEST_HCALL(KVM_HC_KICK_CPU), + TEST_HCALL(KVM_HC_SEND_IPI), + TEST_HCALL(KVM_HC_SCHED_YIELD), +}; + +static void test_hcall(struct hcall_data *hc) +{ + uint64_t r; + + PR_HCALL(hc); + r = kvm_hypercall(hc->nr, 0, 0, 0, 0); + GUEST_ASSERT(r == -KVM_ENOSYS); +} + +static void guest_main(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { + test_msr(&msrs_to_test[i]); + } + + for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) { + test_hcall(&hcalls_to_test[i]); + } + + GUEST_DONE(); +} + +static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid) +{ + struct kvm_cpuid_entry2 ent = {0}; + + ent.function = KVM_CPUID_FEATURES; + TEST_ASSERT(set_cpuid(cpuid, &ent), + "failed to clear KVM_CPUID_FEATURES leaf"); +} + +static void pr_msr(struct ucall *uc) +{ + struct msr_data *msr = (struct msr_data *)uc->args[0]; + + pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx); +} + +static void pr_hcall(struct ucall *uc) +{ + struct hcall_data *hc = (struct hcall_data *)uc->args[0]; + + pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); +} + +static void handle_abort(struct ucall *uc) +{ + TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], + __FILE__, uc->args[1]); +} + +#define VCPU_ID 0 + +static void enter_guest(struct kvm_vm *vm) +{ + struct kvm_run *run; + struct ucall uc; + int r; + + run = vcpu_state(vm, VCPU_ID); + + while (true) { + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_PR_MSR: + pr_msr(&uc); + break; + case UCALL_PR_HCALL: + pr_hcall(&uc); + break; + case UCALL_ABORT: + handle_abort(&uc); + return; + case UCALL_DONE: + return; + } + } +} + +int main(void) +{ + struct kvm_enable_cap cap = {0}; + struct kvm_cpuid2 *best; + struct kvm_vm *vm; + + if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { + pr_info("will skip kvm paravirt restriction tests.\n"); + return 0; + } + + vm = vm_create_default(VCPU_ID, 0, guest_main); + + cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID; + cap.args[0] = 1; + vcpu_enable_cap(vm, VCPU_ID, &cap); + + best = kvm_get_supported_cpuid(); + clear_kvm_cpuid_features(best); + vcpu_set_cpuid(vm, VCPU_ID, best); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + enter_guest(vm); + kvm_vm_free(vm); +} -- cgit v1.3.1 From fd02029a9e019e941835e110651486e2d77d3f84 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 29 Oct 2020 21:17:01 +0100 Subject: KVM: selftests: Add aarch64 get-reg-list test Check for KVM_GET_REG_LIST regressions. The blessed list was created by running on v4.15 with the --core-reg-fixup option. The following script was also used in order to annotate system registers with their names when possible. When new system registers are added the names can just be added manually using the same grep. while read reg; do if [[ ! $reg =~ ARM64_SYS_REG ]]; then printf "\t$reg\n" continue fi encoding=$(echo "$reg" | sed "s/ARM64_SYS_REG(//;s/),//") if ! name=$(grep "$encoding" ../../../../arch/arm64/include/asm/sysreg.h); then printf "\t$reg\n" continue fi name=$(echo "$name" | sed "s/.*SYS_//;s/[\t ]*sys_reg($encoding)$//") printf "\t$reg\t/* $name */\n" done < <(aarch64/get-reg-list --core-reg-fixup --list) Signed-off-by: Andrew Jones Message-Id: <20201029201703.102716-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/aarch64/get-reg-list.c | 671 +++++++++++++++++++++ tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 29 + 5 files changed, 703 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 22973aa75eda..2034765862a2 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/get-reg-list /s390x/memop /s390x/resets /s390x/sync_regs_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 44ca6badb544..771455ae4a1b 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c new file mode 100644 index 000000000000..3ff097f6886e --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -0,0 +1,671 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * Note, the blessed list should be created from the oldest possible + * kernel. We can't go older than v4.15, though, because that's the first + * release to expose the ID system registers in KVM_GET_REG_LIST, see + * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features + * from guests"). Also, one must use the --core-reg-fixup command line + * option when running on an older kernel that doesn't include df205b5c6328 + * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + */ +#include +#include +#include +#include "kvm_util.h" +#include "test_util.h" + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + + +static struct kvm_reg_list *reg_list; + +static __u64 blessed_reg[]; +static __u64 blessed_n; + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +static const char *str_with_index(const char *template, __u64 index) +{ + char *str, *p; + int n; + + str = strdup(template); + p = strstr(str, "##"); + n = sprintf(p, "%lld", index); + strcat(p + n, strstr(template, "##") + 2); + + return (const char *)str; +} + +#define CORE_REGS_XX_NR_WORDS 2 +#define CORE_SPSR_XX_NR_WORDS 2 +#define CORE_FPREGS_XX_NR_WORDS 4 + +static const char *core_id_to_str(__u64 id) +{ + __u64 core_off = id & ~REG_MASK, idx; + + /* + * core_off is the offset into struct kvm_regs + */ + switch (core_off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; + TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + case KVM_REG_ARM_CORE_REG(regs.sp): + return "KVM_REG_ARM_CORE_REG(regs.sp)"; + case KVM_REG_ARM_CORE_REG(regs.pc): + return "KVM_REG_ARM_CORE_REG(regs.pc)"; + case KVM_REG_ARM_CORE_REG(regs.pstate): + return "KVM_REG_ARM_CORE_REG(regs.pstate)"; + case KVM_REG_ARM_CORE_REG(sp_el1): + return "KVM_REG_ARM_CORE_REG(sp_el1)"; + case KVM_REG_ARM_CORE_REG(elr_el1): + return "KVM_REG_ARM_CORE_REG(elr_el1)"; + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; + TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; + TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; + } + + TEST_FAIL("Unknown core reg id: 0x%llx", id); + return NULL; +} + +static void print_reg(__u64 id) +{ + unsigned op0, op1, crn, crm, op2; + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, + "KVM_REG_ARM64 missing in reg id: 0x%llx", id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U8: + reg_size = "KVM_REG_SIZE_U8"; + break; + case KVM_REG_SIZE_U16: + reg_size = "KVM_REG_SIZE_U16"; + break; + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; + case KVM_REG_SIZE_U512: + reg_size = "KVM_REG_SIZE_U512"; + break; + case KVM_REG_SIZE_U1024: + reg_size = "KVM_REG_SIZE_U1024"; + break; + case KVM_REG_SIZE_U2048: + reg_size = "KVM_REG_SIZE_U2048"; + break; + default: + TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id)); + break; + case KVM_REG_ARM_DEMUX: + TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), + "Unexpected bits set in DEMUX reg id: 0x%llx", id); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", + reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); + break; + case KVM_REG_ARM64_SYSREG: + op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT; + op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT; + crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT; + crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; + op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; + TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), + "Unexpected bits set in SYSREG reg id: 0x%llx", id); + printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); + break; + case KVM_REG_ARM_FW: + TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), + "Unexpected bits set in FW reg id: 0x%llx", id); + printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); + break; + case KVM_REG_ARM64_SVE: + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + break; + default: + TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + } +} + +/* + * Older kernels listed each 32-bit word of CORE registers separately. + * For 64 and 128-bit registers we need to ignore the extra words. We + * also need to fixup the sizes, because the older kernels stated all + * registers were 64-bit, even when they weren't. + */ +static void core_reg_fixup(void) +{ + struct kvm_reg_list *tmp; + __u64 id, core_off; + int i; + + tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); + + for (i = 0; i < reg_list->n; ++i) { + id = reg_list->reg[i]; + + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { + tmp->reg[tmp->n++] = id; + continue; + } + + core_off = id & ~REG_MASK; + + switch (core_off) { + case 0x52: case 0xd2: case 0xd6: + /* + * These offsets are pointing at padding. + * We need to ignore them too. + */ + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + if (core_off & 3) + continue; + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U128; + tmp->reg[tmp->n++] = id; + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U32; + tmp->reg[tmp->n++] = id; + continue; + default: + if (core_off & 1) + continue; + tmp->reg[tmp->n++] = id; + break; + } + } + + free(reg_list); + reg_list = tmp; +} + +int main(int ac, char **av) +{ + int new_regs = 0, missing_regs = 0, i; + int failed_get = 0, failed_set = 0; + bool print_list = false, fixup_core_regs = false; + struct kvm_vm *vm; + + for (i = 1; i < ac; ++i) { + if (strcmp(av[i], "--core-reg-fixup") == 0) + fixup_core_regs = true; + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else + fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + } + + vm = vm_create_default(0, 0, NULL); + reg_list = vcpu_get_reg_list(vm, 0); + + if (fixup_core_regs) + core_reg_fixup(); + + if (print_list) { + putchar('\n'); + for_each_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + return 0; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + */ + for_each_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + int ret; + + ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); + if (ret) { + puts("Failed to get "); + print_reg(reg.id); + putchar('\n'); + ++failed_get; + } + + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret) { + puts("Failed to set "); + print_reg(reg.id); + putchar('\n'); + ++failed_set; + } + } + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + printf("Number blessed registers: %5lld\n", blessed_n); + printf("Number registers: %5lld\n", reg_list->n); + } + + if (new_regs) { + printf("\nThere are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", new_regs); + for_each_new_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\nThere are %d missing registers.\n" + "The following lines are missing registers:\n\n", missing_regs); + for_each_missing_reg(i) + print_reg(blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set, + "There are %d missing registers; %d registers failed get; %d registers failed set", + missing_regs, failed_get, failed_set); + + return 0; +} + +/* + * The current blessed list was primed with the output of kernel version + * v4.15 with --core-reg-fixup and then later updated with new registers. + */ +static __u64 blessed_reg[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), + KVM_REG_ARM_FW_REG(0), + KVM_REG_ARM_FW_REG(1), + KVM_REG_ARM_FW_REG(2), + ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 2), + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */ + ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */ + ARM64_SYS_REG(2, 0, 0, 0, 4), + ARM64_SYS_REG(2, 0, 0, 0, 5), + ARM64_SYS_REG(2, 0, 0, 0, 6), + ARM64_SYS_REG(2, 0, 0, 0, 7), + ARM64_SYS_REG(2, 0, 0, 1, 4), + ARM64_SYS_REG(2, 0, 0, 1, 5), + ARM64_SYS_REG(2, 0, 0, 1, 6), + ARM64_SYS_REG(2, 0, 0, 1, 7), + ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 4), + ARM64_SYS_REG(2, 0, 0, 2, 5), + ARM64_SYS_REG(2, 0, 0, 2, 6), + ARM64_SYS_REG(2, 0, 0, 2, 7), + ARM64_SYS_REG(2, 0, 0, 3, 4), + ARM64_SYS_REG(2, 0, 0, 3, 5), + ARM64_SYS_REG(2, 0, 0, 3, 6), + ARM64_SYS_REG(2, 0, 0, 3, 7), + ARM64_SYS_REG(2, 0, 0, 4, 4), + ARM64_SYS_REG(2, 0, 0, 4, 5), + ARM64_SYS_REG(2, 0, 0, 4, 6), + ARM64_SYS_REG(2, 0, 0, 4, 7), + ARM64_SYS_REG(2, 0, 0, 5, 4), + ARM64_SYS_REG(2, 0, 0, 5, 5), + ARM64_SYS_REG(2, 0, 0, 5, 6), + ARM64_SYS_REG(2, 0, 0, 5, 7), + ARM64_SYS_REG(2, 0, 0, 6, 4), + ARM64_SYS_REG(2, 0, 0, 6, 5), + ARM64_SYS_REG(2, 0, 0, 6, 6), + ARM64_SYS_REG(2, 0, 0, 6, 7), + ARM64_SYS_REG(2, 0, 0, 7, 4), + ARM64_SYS_REG(2, 0, 0, 7, 5), + ARM64_SYS_REG(2, 0, 0, 7, 6), + ARM64_SYS_REG(2, 0, 0, 7, 7), + ARM64_SYS_REG(2, 0, 0, 8, 4), + ARM64_SYS_REG(2, 0, 0, 8, 5), + ARM64_SYS_REG(2, 0, 0, 8, 6), + ARM64_SYS_REG(2, 0, 0, 8, 7), + ARM64_SYS_REG(2, 0, 0, 9, 4), + ARM64_SYS_REG(2, 0, 0, 9, 5), + ARM64_SYS_REG(2, 0, 0, 9, 6), + ARM64_SYS_REG(2, 0, 0, 9, 7), + ARM64_SYS_REG(2, 0, 0, 10, 4), + ARM64_SYS_REG(2, 0, 0, 10, 5), + ARM64_SYS_REG(2, 0, 0, 10, 6), + ARM64_SYS_REG(2, 0, 0, 10, 7), + ARM64_SYS_REG(2, 0, 0, 11, 4), + ARM64_SYS_REG(2, 0, 0, 11, 5), + ARM64_SYS_REG(2, 0, 0, 11, 6), + ARM64_SYS_REG(2, 0, 0, 11, 7), + ARM64_SYS_REG(2, 0, 0, 12, 4), + ARM64_SYS_REG(2, 0, 0, 12, 5), + ARM64_SYS_REG(2, 0, 0, 12, 6), + ARM64_SYS_REG(2, 0, 0, 12, 7), + ARM64_SYS_REG(2, 0, 0, 13, 4), + ARM64_SYS_REG(2, 0, 0, 13, 5), + ARM64_SYS_REG(2, 0, 0, 13, 6), + ARM64_SYS_REG(2, 0, 0, 13, 7), + ARM64_SYS_REG(2, 0, 0, 14, 4), + ARM64_SYS_REG(2, 0, 0, 14, 5), + ARM64_SYS_REG(2, 0, 0, 14, 6), + ARM64_SYS_REG(2, 0, 0, 14, 7), + ARM64_SYS_REG(2, 0, 0, 15, 4), + ARM64_SYS_REG(2, 0, 0, 15, 5), + ARM64_SYS_REG(2, 0, 0, 15, 6), + ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ + ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 3), + ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 7), + ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 2), + ARM64_SYS_REG(3, 0, 0, 4, 3), + ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 5), + ARM64_SYS_REG(3, 0, 0, 4, 6), + ARM64_SYS_REG(3, 0, 0, 4, 7), + ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 2), + ARM64_SYS_REG(3, 0, 0, 5, 3), + ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 6), + ARM64_SYS_REG(3, 0, 0, 5, 7), + ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 2), + ARM64_SYS_REG(3, 0, 0, 6, 3), + ARM64_SYS_REG(3, 0, 0, 6, 4), + ARM64_SYS_REG(3, 0, 0, 6, 5), + ARM64_SYS_REG(3, 0, 0, 6, 6), + ARM64_SYS_REG(3, 0, 0, 6, 7), + ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), + ARM64_SYS_REG(3, 0, 0, 7, 4), + ARM64_SYS_REG(3, 0, 0, 7, 5), + ARM64_SYS_REG(3, 0, 0, 7, 6), + ARM64_SYS_REG(3, 0, 0, 7, 7), + ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ + ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ + ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ + ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ + ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 3, 14, 8, 0), + ARM64_SYS_REG(3, 3, 14, 8, 1), + ARM64_SYS_REG(3, 3, 14, 8, 2), + ARM64_SYS_REG(3, 3, 14, 8, 3), + ARM64_SYS_REG(3, 3, 14, 8, 4), + ARM64_SYS_REG(3, 3, 14, 8, 5), + ARM64_SYS_REG(3, 3, 14, 8, 6), + ARM64_SYS_REG(3, 3, 14, 8, 7), + ARM64_SYS_REG(3, 3, 14, 9, 0), + ARM64_SYS_REG(3, 3, 14, 9, 1), + ARM64_SYS_REG(3, 3, 14, 9, 2), + ARM64_SYS_REG(3, 3, 14, 9, 3), + ARM64_SYS_REG(3, 3, 14, 9, 4), + ARM64_SYS_REG(3, 3, 14, 9, 5), + ARM64_SYS_REG(3, 3, 14, 9, 6), + ARM64_SYS_REG(3, 3, 14, 9, 7), + ARM64_SYS_REG(3, 3, 14, 10, 0), + ARM64_SYS_REG(3, 3, 14, 10, 1), + ARM64_SYS_REG(3, 3, 14, 10, 2), + ARM64_SYS_REG(3, 3, 14, 10, 3), + ARM64_SYS_REG(3, 3, 14, 10, 4), + ARM64_SYS_REG(3, 3, 14, 10, 5), + ARM64_SYS_REG(3, 3, 14, 10, 6), + ARM64_SYS_REG(3, 3, 14, 10, 7), + ARM64_SYS_REG(3, 3, 14, 11, 0), + ARM64_SYS_REG(3, 3, 14, 11, 1), + ARM64_SYS_REG(3, 3, 14, 11, 2), + ARM64_SYS_REG(3, 3, 14, 11, 3), + ARM64_SYS_REG(3, 3, 14, 11, 4), + ARM64_SYS_REG(3, 3, 14, 11, 5), + ARM64_SYS_REG(3, 3, 14, 11, 6), + ARM64_SYS_REG(3, 3, 14, 12, 0), + ARM64_SYS_REG(3, 3, 14, 12, 1), + ARM64_SYS_REG(3, 3, 14, 12, 2), + ARM64_SYS_REG(3, 3, 14, 12, 3), + ARM64_SYS_REG(3, 3, 14, 12, 4), + ARM64_SYS_REG(3, 3, 14, 12, 5), + ARM64_SYS_REG(3, 3, 14, 12, 6), + ARM64_SYS_REG(3, 3, 14, 12, 7), + ARM64_SYS_REG(3, 3, 14, 13, 0), + ARM64_SYS_REG(3, 3, 14, 13, 1), + ARM64_SYS_REG(3, 3, 14, 13, 2), + ARM64_SYS_REG(3, 3, 14, 13, 3), + ARM64_SYS_REG(3, 3, 14, 13, 4), + ARM64_SYS_REG(3, 3, 14, 13, 5), + ARM64_SYS_REG(3, 3, 14, 13, 6), + ARM64_SYS_REG(3, 3, 14, 13, 7), + ARM64_SYS_REG(3, 3, 14, 14, 0), + ARM64_SYS_REG(3, 3, 14, 14, 1), + ARM64_SYS_REG(3, 3, 14, 14, 2), + ARM64_SYS_REG(3, 3, 14, 14, 3), + ARM64_SYS_REG(3, 3, 14, 14, 4), + ARM64_SYS_REG(3, 3, 14, 14, 5), + ARM64_SYS_REG(3, 3, 14, 14, 6), + ARM64_SYS_REG(3, 3, 14, 14, 7), + ARM64_SYS_REG(3, 3, 14, 15, 0), + ARM64_SYS_REG(3, 3, 14, 15, 1), + ARM64_SYS_REG(3, 3, 14, 15, 2), + ARM64_SYS_REG(3, 3, 14, 15, 3), + ARM64_SYS_REG(3, 3, 14, 15, 4), + ARM64_SYS_REG(3, 3, 14, 15, 5), + ARM64_SYS_REG(3, 3, 14, 15, 6), + ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, +}; +static __u64 blessed_n = ARRAY_SIZE(blessed_reg); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 77f53dbc34ff..feb949a92055 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -152,6 +152,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_guest_debug *debug); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 91e547031d8c..7669cb7c86e3 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1291,6 +1291,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, "rc: %i errno: %i", ret, errno); } +/* + * VM VCPU Get Reg List + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * None + * + * Return: + * A pointer to an allocated struct kvm_reg_list + * + * Get the list of guest registers which are supported for + * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls + */ +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); + TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); + reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); + reg_list->n = reg_list_n.n; + vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); + return reg_list; +} + /* * VM VCPU Regs Get * -- cgit v1.3.1 From 31d212959179015bc07f3af4e890cadd26e01ee0 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 29 Oct 2020 21:17:03 +0100 Subject: KVM: selftests: Add blessed SVE registers to get-reg-list Add support for the SVE registers to get-reg-list and create a new test, get-reg-list-sve, which tests them when running on a machine with SVE support. Signed-off-by: Andrew Jones Message-Id: <20201029201703.102716-5-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/get-reg-list-sve.c | 3 + tools/testing/selftests/kvm/aarch64/get-reg-list.c | 254 +++++++++++++++++---- 4 files changed, 217 insertions(+), 42 deletions(-) create mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2034765862a2..ea1692d43411 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only /aarch64/get-reg-list +/aarch64/get-reg-list-sve /s390x/memop /s390x/resets /s390x/sync_regs_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 771455ae4a1b..2b0d7d325e2a 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c new file mode 100644 index 000000000000..efba76682b4b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define REG_LIST_SVE +#include "get-reg-list.c" diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 3ff097f6886e..33218a395d9f 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -29,6 +29,13 @@ #include #include "kvm_util.h" #include "test_util.h" +#include "processor.h" + +#ifdef REG_LIST_SVE +#define reg_list_sve() (true) +#else +#define reg_list_sve() (false) +#endif #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) @@ -46,8 +53,9 @@ static struct kvm_reg_list *reg_list; -static __u64 blessed_reg[]; -static __u64 blessed_n; +static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; +static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; +static __u64 *blessed_reg, blessed_n; static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) { @@ -119,6 +127,40 @@ static const char *core_id_to_str(__u64 id) return NULL; } +static const char *sve_id_to_str(__u64 id) +{ + __u64 sve_off, n, i; + + if (id == KVM_REG_ARM64_SVE_VLS) + return "KVM_REG_ARM64_SVE_VLS"; + + sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); + i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); + + TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id); + + switch (sve_off) { + case KVM_REG_ARM64_SVE_ZREG_BASE ... + KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), + "Unexpected bits set in SVE ZREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + case KVM_REG_ARM64_SVE_PREG_BASE ... + KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), + "Unexpected bits set in SVE PREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + case KVM_REG_ARM64_SVE_FFR_BASE: + TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), + "Unexpected bits set in SVE FFR id: 0x%llx", id); + return "KVM_REG_ARM64_SVE_FFR(0)"; + } + + return NULL; +} + static void print_reg(__u64 id) { unsigned op0, op1, crn, crm, op2; @@ -186,7 +228,10 @@ static void print_reg(__u64 id) printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM64_SVE: - TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + if (reg_list_sve()) + printf("\t%s,\n", sve_id_to_str(id)); + else + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); break; default: TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", @@ -251,12 +296,40 @@ static void core_reg_fixup(void) reg_list = tmp; } +static void prepare_vcpu_init(struct kvm_vcpu_init *init) +{ + if (reg_list_sve()) + init->features[0] |= 1 << KVM_ARM_VCPU_SVE; +} + +static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + int feature; + + if (reg_list_sve()) { + feature = KVM_ARM_VCPU_SVE; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + } +} + +static void check_supported(void) +{ + if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) { + fprintf(stderr, "SVE not available, skipping tests\n"); + exit(KSFT_SKIP); + } +} + int main(int ac, char **av) { + struct kvm_vcpu_init init = { .target = -1, }; int new_regs = 0, missing_regs = 0, i; - int failed_get = 0, failed_set = 0; + int failed_get = 0, failed_set = 0, failed_reject = 0; bool print_list = false, fixup_core_regs = false; struct kvm_vm *vm; + __u64 *vec_regs; + + check_supported(); for (i = 1; i < ac; ++i) { if (strcmp(av[i], "--core-reg-fixup") == 0) @@ -267,7 +340,11 @@ int main(int ac, char **av) fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); } - vm = vm_create_default(0, 0, NULL); + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + prepare_vcpu_init(&init); + aarch64_vcpu_add_default(vm, 0, &init, NULL); + finalize_vcpu(vm, 0); + reg_list = vcpu_get_reg_list(vm, 0); if (fixup_core_regs) @@ -307,6 +384,18 @@ int main(int ac, char **av) ++failed_get; } + /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ + if (find_reg(rejects_set, rejects_set_n, reg.id)) { + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret != -1 || errno != EPERM) { + printf("Failed to reject (ret=%d, errno=%d) ", ret, errno); + print_reg(reg.id); + putchar('\n'); + ++failed_reject; + } + continue; + } + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); if (ret) { puts("Failed to set "); @@ -316,6 +405,20 @@ int main(int ac, char **av) } } + if (reg_list_sve()) { + blessed_n = base_regs_n + sve_regs_n; + vec_regs = sve_regs; + } else { + blessed_n = base_regs_n + vregs_n; + vec_regs = vregs; + } + + blessed_reg = calloc(blessed_n, sizeof(__u64)); + for (i = 0; i < base_regs_n; ++i) + blessed_reg[i] = base_regs[i]; + for (i = 0; i < blessed_n - base_regs_n; ++i) + blessed_reg[base_regs_n + i] = vec_regs[i]; + for_each_new_reg(i) ++new_regs; @@ -344,9 +447,10 @@ int main(int ac, char **av) putchar('\n'); } - TEST_ASSERT(!missing_regs && !failed_get && !failed_set, - "There are %d missing registers; %d registers failed get; %d registers failed set", - missing_regs, failed_get, failed_set); + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "There are %d missing registers; " + "%d registers failed get; %d registers failed set; %d registers failed reject", + missing_regs, failed_get, failed_set, failed_reject); return 0; } @@ -355,7 +459,7 @@ int main(int ac, char **av) * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. */ -static __u64 blessed_reg[] = { +static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), @@ -397,38 +501,6 @@ static __u64 blessed_reg[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), KVM_REG_ARM_FW_REG(0), @@ -668,4 +740,102 @@ static __u64 blessed_reg[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, }; -static __u64 blessed_n = ARRAY_SIZE(blessed_reg); +static __u64 base_regs_n = ARRAY_SIZE(base_regs); + +static __u64 vregs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), +}; +static __u64 vregs_n = ARRAY_SIZE(vregs); + +static __u64 sve_regs[] = { + KVM_REG_ARM64_SVE_VLS, + KVM_REG_ARM64_SVE_ZREG(0, 0), + KVM_REG_ARM64_SVE_ZREG(1, 0), + KVM_REG_ARM64_SVE_ZREG(2, 0), + KVM_REG_ARM64_SVE_ZREG(3, 0), + KVM_REG_ARM64_SVE_ZREG(4, 0), + KVM_REG_ARM64_SVE_ZREG(5, 0), + KVM_REG_ARM64_SVE_ZREG(6, 0), + KVM_REG_ARM64_SVE_ZREG(7, 0), + KVM_REG_ARM64_SVE_ZREG(8, 0), + KVM_REG_ARM64_SVE_ZREG(9, 0), + KVM_REG_ARM64_SVE_ZREG(10, 0), + KVM_REG_ARM64_SVE_ZREG(11, 0), + KVM_REG_ARM64_SVE_ZREG(12, 0), + KVM_REG_ARM64_SVE_ZREG(13, 0), + KVM_REG_ARM64_SVE_ZREG(14, 0), + KVM_REG_ARM64_SVE_ZREG(15, 0), + KVM_REG_ARM64_SVE_ZREG(16, 0), + KVM_REG_ARM64_SVE_ZREG(17, 0), + KVM_REG_ARM64_SVE_ZREG(18, 0), + KVM_REG_ARM64_SVE_ZREG(19, 0), + KVM_REG_ARM64_SVE_ZREG(20, 0), + KVM_REG_ARM64_SVE_ZREG(21, 0), + KVM_REG_ARM64_SVE_ZREG(22, 0), + KVM_REG_ARM64_SVE_ZREG(23, 0), + KVM_REG_ARM64_SVE_ZREG(24, 0), + KVM_REG_ARM64_SVE_ZREG(25, 0), + KVM_REG_ARM64_SVE_ZREG(26, 0), + KVM_REG_ARM64_SVE_ZREG(27, 0), + KVM_REG_ARM64_SVE_ZREG(28, 0), + KVM_REG_ARM64_SVE_ZREG(29, 0), + KVM_REG_ARM64_SVE_ZREG(30, 0), + KVM_REG_ARM64_SVE_ZREG(31, 0), + KVM_REG_ARM64_SVE_PREG(0, 0), + KVM_REG_ARM64_SVE_PREG(1, 0), + KVM_REG_ARM64_SVE_PREG(2, 0), + KVM_REG_ARM64_SVE_PREG(3, 0), + KVM_REG_ARM64_SVE_PREG(4, 0), + KVM_REG_ARM64_SVE_PREG(5, 0), + KVM_REG_ARM64_SVE_PREG(6, 0), + KVM_REG_ARM64_SVE_PREG(7, 0), + KVM_REG_ARM64_SVE_PREG(8, 0), + KVM_REG_ARM64_SVE_PREG(9, 0), + KVM_REG_ARM64_SVE_PREG(10, 0), + KVM_REG_ARM64_SVE_PREG(11, 0), + KVM_REG_ARM64_SVE_PREG(12, 0), + KVM_REG_ARM64_SVE_PREG(13, 0), + KVM_REG_ARM64_SVE_PREG(14, 0), + KVM_REG_ARM64_SVE_PREG(15, 0), + KVM_REG_ARM64_SVE_FFR(0), + ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ +}; +static __u64 sve_regs_n = ARRAY_SIZE(sve_regs); + +static __u64 rejects_set[] = { +#ifdef REG_LIST_SVE + KVM_REG_ARM64_SVE_VLS, +#endif +}; +static __u64 rejects_set_n = ARRAY_SIZE(rejects_set); -- cgit v1.3.1 From 3031e0288e60f09533339e61117b83099a6e126e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:28 -0400 Subject: KVM: selftests: Always clear dirty bitmap after iteration We used not to clear the dirty bitmap before because KVM_GET_DIRTY_LOG would overwrite it the next time it copies the dirty log onto it. In the upcoming dirty ring tests we'll start to fetch dirty pages from a ring buffer, so no one is going to clear the dirty bitmap for us. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012228.5916-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 752ec158ac59..6a8275a22861 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -195,7 +195,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_bit_le(page, bmap)) { + if (test_and_clear_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto -- cgit v1.3.1 From afdb1960071935cfd5c1908691a34cc6e36931f7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:33 -0400 Subject: KVM: selftests: Use a single binary for dirty/clear log test Remove the clear_dirty_log test, instead merge it into the existing dirty_log_test. It should be cleaner to use this single binary to do both tests, also it's a preparation for the upcoming dirty ring test. The default behavior will run all the modes in sequence. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012233.6013-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 - tools/testing/selftests/kvm/clear_dirty_log_test.c | 6 - tools/testing/selftests/kvm/dirty_log_test.c | 187 +++++++++++++++++---- 3 files changed, 156 insertions(+), 39 deletions(-) delete mode 100644 tools/testing/selftests/kvm/clear_dirty_log_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 2b0d7d325e2a..c8af04dccf7f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -59,7 +59,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test -TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus @@ -68,7 +67,6 @@ TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve -TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c deleted file mode 100644 index 11672ec6f74e..000000000000 --- a/tools/testing/selftests/kvm/clear_dirty_log_test.c +++ /dev/null @@ -1,6 +0,0 @@ -#define USE_CLEAR_DIRTY_LOG -#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) -#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) -#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ - KVM_DIRTY_LOG_INITIALLY_SET) -#include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 6a8275a22861..139ccb550618 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -128,6 +128,78 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +enum log_mode_t { + /* Only use KVM_GET_DIRTY_LOG for logging */ + LOG_MODE_DIRTY_LOG = 0, + + /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ + LOG_MODE_CLEAR_LOG = 1, + + LOG_MODE_NUM, + + /* Run all supported modes */ + LOG_MODE_ALL = LOG_MODE_NUM, +}; + +/* Mode of logging to test. Default is to run all supported modes */ +static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; +/* Logging mode for current run */ +static enum log_mode_t host_log_mode; + +static bool clear_log_supported(void) +{ + return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); +} + +static void clear_log_create_vm_done(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = {}; + u64 manual_caps; + + manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!"); + manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = manual_caps; + vm_enable_cap(vm, &cap); +} + +static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); +} + +static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); + kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); +} + +struct log_mode { + const char *name; + /* Return true if this mode is supported, otherwise false */ + bool (*supported)(void); + /* Hook when the vm creation is done (before vcpu creation) */ + void (*create_vm_done)(struct kvm_vm *vm); + /* Hook to collect the dirty pages into the bitmap provided */ + void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages); +} log_modes[LOG_MODE_NUM] = { + { + .name = "dirty-log", + .collect_dirty_pages = dirty_log_collect_dirty_pages, + }, + { + .name = "clear-log", + .supported = clear_log_supported, + .create_vm_done = clear_log_create_vm_done, + .collect_dirty_pages = clear_log_collect_dirty_pages, + }, +}; + /* * We use this bitmap to track some pages that should have its dirty * bit set in the _next_ iteration. For example, if we detected the @@ -137,6 +209,44 @@ static uint64_t host_track_next_count; */ static unsigned long *host_bmap_track; +static void log_modes_dump(void) +{ + int i; + + printf("all"); + for (i = 0; i < LOG_MODE_NUM; i++) + printf(", %s", log_modes[i].name); + printf("\n"); +} + +static bool log_mode_supported(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->supported) + return mode->supported(); + + return true; +} + +static void log_mode_create_vm_done(struct kvm_vm *vm) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->create_vm_done) + mode->create_vm_done(vm); +} + +static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + TEST_ASSERT(mode->collect_dirty_pages != NULL, + "collect_dirty_pages() is required for any log mode!"); + mode->collect_dirty_pages(vm, slot, bitmap, num_pages); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -257,6 +367,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #ifdef __x86_64__ vm_create_irqchip(vm); #endif + log_mode_create_vm_done(vm); vm_vcpu_add_default(vm, vcpuid, guest_code); return vm; } @@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -#ifdef USE_CLEAR_DIRTY_LOG -static u64 dirty_log_manual_caps; -#endif - static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_vm *vm; unsigned long *bmap; + if (!log_mode_supported()) { + print_skip("Log mode '%s' not supported", + log_modes[host_log_mode].name); + return; + } + /* * We reserve page table for 2 times of extra dirty mem which * will definitely cover the original (1G+) test range. Here @@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); -#ifdef USE_CLEAR_DIRTY_LOG - struct kvm_enable_cap cap = {}; - - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); -#endif - /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, guest_test_phys_mem, @@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, while (iteration < iterations) { /* Give the vcpu thread some time to dirty some pages */ usleep(interval * 1000); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); -#ifdef USE_CLEAR_DIRTY_LOG - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - host_num_pages); -#endif + log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, + bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); iteration++; sync_global_to_guest(vm, iteration); @@ -410,6 +512,9 @@ static void help(char *name) TEST_HOST_LOOP_INTERVAL); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -M: specify the host logging mode " + "(default: run all log modes). Supported modes: \n\t"); + log_modes_dump(); printf(" -m: specify the guest mode ID to test " "(default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -429,18 +534,7 @@ int main(int argc, char *argv[]) bool mode_selected = false; uint64_t phys_offset = 0; unsigned int mode; - int opt, i; - -#ifdef USE_CLEAR_DIRTY_LOG - dirty_log_manual_caps = - kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - if (!dirty_log_manual_caps) { - print_skip("KVM_CLEAR_DIRTY_LOG not available"); - exit(KSFT_SKIP); - } - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); -#endif + int opt, i, j; #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); @@ -464,7 +558,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { + while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { switch (opt) { case 'i': iterations = strtol(optarg, NULL, 10); @@ -486,6 +580,26 @@ int main(int argc, char *argv[]) "Guest mode ID %d too big", mode); guest_modes[mode].enabled = true; break; + case 'M': + if (!strcmp(optarg, "all")) { + host_log_mode_option = LOG_MODE_ALL; + break; + } + for (i = 0; i < LOG_MODE_NUM; i++) { + if (!strcmp(optarg, log_modes[i].name)) { + pr_info("Setting log mode to: '%s'\n", + optarg); + host_log_mode_option = i; + break; + } + } + if (i == LOG_MODE_NUM) { + printf("Log mode '%s' invalid. Please choose " + "from: ", optarg); + log_modes_dump(); + exit(1); + } + break; case 'h': default: help(argv[0]); @@ -507,7 +621,18 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (j = 0; j < LOG_MODE_NUM; j++) { + pr_info("Testing Log Mode '%s'\n", + log_modes[j].name); + host_log_mode = j; + run_test(i, iterations, interval, phys_offset); + } + } else { + host_log_mode = host_log_mode_option; + run_test(i, iterations, interval, phys_offset); + } } return 0; -- cgit v1.3.1 From 4b5d12b0e21cc9f9f00201819844fcafb020ffad Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:29 -0700 Subject: KVM: selftests: Factor code out of demand_paging_test Much of the code in demand_paging_test can be reused by other, similar multi-vCPU-memory-touching-perfromance-tests. Factor that common code out for reuse. No functional change expected. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 204 +++------------------ .../testing/selftests/kvm/include/perf_test_util.h | 187 +++++++++++++++++++ 2 files changed, 210 insertions(+), 181 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/perf_test_util.h (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 360cd3ea4cd6..4251e98ceb69 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -21,18 +21,12 @@ #include #include -#include "test_util.h" -#include "kvm_util.h" +#include "perf_test_util.h" #include "processor.h" +#include "test_util.h" #ifdef __NR_userfaultfd -/* The memory slot index demand page */ -#define TEST_MEM_SLOT_INDEX 1 - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - #define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ #ifdef PRINT_PER_PAGE_UPDATES @@ -47,75 +41,14 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif -#define MAX_VCPUS 512 - -/* - * Guest/Host shared variables. Ensure addr_gva2hva() and/or - * sync_global_to/from_guest() are used when accessing from - * the host. READ/WRITE_ONCE() should also be used with anything - * that may change. - */ -static uint64_t host_page_size; -static uint64_t guest_page_size; - static char *guest_data_prototype; -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; - -struct vcpu_args { - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - int vcpu_id; - struct kvm_vm *vm; -}; - -static struct vcpu_args vcpu_args[MAX_VCPUS]; - -/* - * Continuously write to the first 8 bytes of each page in the demand paging - * memory region. - */ -static void guest_code(uint32_t vcpu_id) -{ - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id); - - gva = vcpu_args[vcpu_id].gva; - pages = vcpu_args[vcpu_id].pages; - - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * guest_page_size); - - addr &= ~(host_page_size - 1); - *(uint64_t *)addr = 0x0123456789ABCDEF; - } - - GUEST_SYNC(1); -} - static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *args = (struct vcpu_args *)data; - struct kvm_vm *vm = args->vm; - int vcpu_id = args->vcpu_id; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; struct timespec start, end, ts_diff; @@ -141,39 +74,6 @@ static void *vcpu_worker(void *data) return NULL; } -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; - - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - vm = _vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - return vm; -} - static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; @@ -186,7 +86,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) copy.src = (uint64_t)guest_data_prototype; copy.dst = addr; - copy.len = host_page_size; + copy.len = perf_test_args.host_page_size; copy.mode = 0; clock_gettime(CLOCK_MONOTONIC, &start); @@ -203,7 +103,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, timespec_to_ns(timespec_sub(end, start))); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", - host_page_size, addr, tid); + perf_test_args.host_page_size, addr, tid); return 0; } @@ -360,64 +260,21 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, struct timespec start, end, ts_diff; int *pipefds = NULL; struct kvm_vm *vm; - uint64_t guest_num_pages; int vcpu_id; int r; vm = create_vm(mode, vcpus, vcpu_memory_bytes); - guest_page_size = vm_get_page_size(vm); - - TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - host_page_size = getpagesize(); - TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0, - "Guest memory size is not host page size aligned."); - - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - guest_page_size; - guest_test_phys_mem &= ~(host_page_size - 1); - -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing demand paging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - guest_data_prototype = malloc(host_page_size); + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); - memset(guest_data_prototype, 0xAB, host_page_size); + memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + add_vcpus(vm, vcpus, vcpu_memory_bytes); + if (use_uffd) { uffd_handler_threads = malloc(vcpus * sizeof(*uffd_handler_threads)); @@ -428,22 +285,18 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pipefds = malloc(sizeof(int) * vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - } - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vm_paddr_t vcpu_gpa; - void *vcpu_hva; - - vm_vcpu_add_default(vm, vcpu_id, guest_code); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vm_paddr_t vcpu_gpa; + void *vcpu_hva; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); - /* Cache the HVA pointer of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); + /* Cache the HVA pointer of the region */ + vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - if (use_uffd) { /* * Set up user fault fd to handle demand paging * requests. @@ -460,22 +313,10 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, if (r < 0) exit(-r); } - -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); -#endif - - vcpu_args[vcpu_id].vm = vm; - vcpu_args[vcpu_id].vcpu_id = vcpu_id; - vcpu_args[vcpu_id].gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size; } /* Export the shared variables to the guest */ - sync_global_to_guest(vm, host_page_size); - sync_global_to_guest(vm, guest_page_size); - sync_global_to_guest(vm, vcpu_args); + sync_global_to_guest(vm, perf_test_args); pr_info("Finished creating vCPUs and starting uffd threads\n"); @@ -483,7 +324,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &vcpu_args[vcpu_id]); + &perf_test_args.vcpu_args[vcpu_id]); } pr_info("Started all vCPUs\n"); @@ -514,7 +355,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + perf_test_args.vcpu_args[0].pages * vcpus / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h new file mode 100644 index 000000000000..f71f0858a1f2 --- /dev/null +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/perf_test_util.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H +#define SELFTEST_KVM_PERF_TEST_UTIL_H + +#include "kvm_util.h" +#include "processor.h" + +#define MAX_VCPUS 512 + +#define PAGE_SHIFT_4K 12 +#define PTES_PER_4K_PT 512 + +#define TEST_MEM_SLOT_INDEX 1 + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +struct vcpu_args { + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + int vcpu_id; +}; + +struct perf_test_args { + struct kvm_vm *vm; + uint64_t host_page_size; + uint64_t guest_page_size; + + struct vcpu_args vcpu_args[MAX_VCPUS]; +}; + +static struct perf_test_args perf_test_args; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + addr &= ~(perf_test_args.host_page_size - 1); + *(uint64_t *)addr = 0x0123456789ABCDEF; + } + + GUEST_SYNC(1); +} + +static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES; + uint64_t guest_num_pages; + + /* Account for a few pages per-vCPU for stacks */ + pages += DEFAULT_STACK_PGS * vcpus; + + /* + * Reserve twice the ammount of memory needed to map the test region and + * the page table / stacks region, at 4k, for page tables. Do the + * calculation with 4K page size: the smallest of all archs. (e.g., 64K + * page size guest will need even less memory for page tables). + */ + pages += (2 * pages) / PTES_PER_4K_PT; + pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / + PTES_PER_4K_PT; + pages = vm_adjust_num_guest_pages(mode, pages); + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + vm = _vm_create(mode, pages, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + + perf_test_args.vm = vm; + perf_test_args.guest_page_size = vm_get_page_size(vm); + perf_test_args.host_page_size = getpagesize(); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + guest_num_pages = (vcpus * vcpu_memory_bytes) / + perf_test_args.guest_page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vm_vcpu_add_default(vm, vcpu_id, guest_code); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); +#endif + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} + +#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ -- cgit v1.3.1 From 2fe5149bdfbf3c2cdfafd2b5b496252d45ca1f78 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:30 -0700 Subject: KVM: selftests: Remove address rounding in guest code Rounding the address the guest writes to a host page boundary will only have an effect if the host page size is larger than the guest page size, but in that case the guest write would still go to the same host page. There's no reason to round the address down, so remove the rounding to simplify the demand paging test. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-3-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/perf_test_util.h | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index f71f0858a1f2..838f946700f0 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -72,7 +72,6 @@ static void guest_code(uint32_t vcpu_id) for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * perf_test_args.guest_page_size); - addr &= ~(perf_test_args.host_page_size - 1); *(uint64_t *)addr = 0x0123456789ABCDEF; } -- cgit v1.3.1 From 1eafbd27edb5098ed6b6bc404c35d56c78beb0fd Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:31 -0700 Subject: KVM: selftests: Simplify demand_paging_test with timespec_diff_now Add a helper function to get the current time and return the time since a given start time. Use that function to simplify the timekeeping in the demand paging test. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-4-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 26 ++++++++++++------------ tools/testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 15 ++++++++++++-- 3 files changed, 27 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 4251e98ceb69..7de6feb00076 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -50,7 +50,8 @@ static void *vcpu_worker(void *data) int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); @@ -66,8 +67,7 @@ static void *vcpu_worker(void *data) exit_reason_str(run->exit_reason)); } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); @@ -78,7 +78,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; struct timespec start; - struct timespec end; + struct timespec ts_diff; struct uffdio_copy copy; int r; @@ -98,10 +98,10 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) return r; } - clock_gettime(CLOCK_MONOTONIC, &end); + ts_diff = timespec_diff_now(start); PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, - timespec_to_ns(timespec_sub(end, start))); + timespec_to_ns(ts_diff)); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", perf_test_args.host_page_size, addr, tid); @@ -123,7 +123,8 @@ static void *uffd_handler_thread_fn(void *arg) int pipefd = uffd_args->pipefd; useconds_t delay = uffd_args->delay; int64_t pages = 0; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; clock_gettime(CLOCK_MONOTONIC, &start); while (!quit_uffd_thread) { @@ -192,8 +193,7 @@ static void *uffd_handler_thread_fn(void *arg) pages++; } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", pages, ts_diff.tv_sec, ts_diff.tv_nsec, pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); @@ -257,7 +257,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; int *pipefds = NULL; struct kvm_vm *vm; int vcpu_id; @@ -335,9 +336,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } - pr_info("All vCPU threads joined\n"); + ts_diff = timespec_diff_now(start); - clock_gettime(CLOCK_MONOTONIC, &end); + pr_info("All vCPU threads joined\n"); if (use_uffd) { char c; @@ -351,7 +352,6 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } - ts_diff = timespec_sub(end, start); pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 5eb01bf51b86..1cc036ddb0c5 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -64,5 +64,6 @@ int64_t timespec_to_ns(struct timespec ts); struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); +struct timespec timespec_diff_now(struct timespec start); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 689e97c27ee2..1a46c2c48c7c 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,10 +4,13 @@ * * Copyright (C) 2020, Google LLC. */ -#include + +#include #include #include -#include +#include +#include + #include "test_util.h" /* @@ -81,6 +84,14 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) return timespec_add_ns((struct timespec){0}, ns1 - ns2); } +struct timespec timespec_diff_now(struct timespec start) +{ + struct timespec end; + + clock_gettime(CLOCK_MONOTONIC, &end); + return timespec_sub(end, start); +} + void print_skip(const char *fmt, ...) { va_list ap; -- cgit v1.3.1 From 92ab4b9a22cfea9b0d353e86024208040c10e807 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:32 -0700 Subject: KVM: selftests: Add wrfract to common guest code Wrfract will be used by the dirty logging perf test introduced later in this series to dirty memory sparsely. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-5-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 2 ++ tools/testing/selftests/kvm/include/perf_test_util.h | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 7de6feb00076..47defc65aeda 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -266,6 +266,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vm = create_vm(mode, vcpus, vcpu_memory_bytes); + perf_test_args.wr_fract = 1; + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 838f946700f0..1716300469c0 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -46,6 +46,7 @@ struct perf_test_args { struct kvm_vm *vm; uint64_t host_page_size; uint64_t guest_page_size; + int wr_fract; struct vcpu_args vcpu_args[MAX_VCPUS]; }; @@ -72,7 +73,10 @@ static void guest_code(uint32_t vcpu_id) for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * perf_test_args.guest_page_size); - *(uint64_t *)addr = 0x0123456789ABCDEF; + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); } GUEST_SYNC(1); -- cgit v1.3.1 From f663132d1e09166db419afb9832d463e0a79f3d5 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:48 +0100 Subject: KVM: selftests: Drop pointless vm_create wrapper Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- tools/testing/selftests/kvm/include/kvm_util.h | 1 - tools/testing/selftests/kvm/include/perf_test_util.h | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 7 +------ 4 files changed, 3 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 139ccb550618..54da9cc20db4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -362,7 +362,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index feb949a92055..7d29aa786959 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -68,7 +68,6 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 1716300469c0..8e053e9dc7ea 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -105,7 +105,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, pages, O_RDWR); + vm = vm_create(mode, pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7669cb7c86e3..126c6727a6b0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -180,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; @@ -271,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) -{ - return _vm_create(mode, phy_pages, perm); -} - /* * VM Restart * -- cgit v1.3.1 From 6769155fece2100506e22161945712afae61769f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:52 +0100 Subject: KVM: selftests: Make the per vcpu memory size global Rename vcpu_memory_bytes to something with "percpu" in it in order to be less ambiguous. Also make it global to simplify things. Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-7-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 20 ++++++++------------ tools/testing/selftests/kvm/include/perf_test_util.h | 3 +++ 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 47defc65aeda..33acd954a298 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -27,8 +27,6 @@ #ifdef __NR_userfaultfd -#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ - #ifdef PRINT_PER_PAGE_UPDATES #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) #else @@ -251,8 +249,7 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus, - uint64_t vcpu_memory_bytes) + useconds_t uffd_delay, int vcpus) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; @@ -264,7 +261,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, int vcpu_id; int r; - vm = create_vm(mode, vcpus, vcpu_memory_bytes); + vm = create_vm(mode, vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -276,7 +273,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, vcpus, vcpu_memory_bytes); + add_vcpus(vm, vcpus, guest_percpu_mem_size); if (use_uffd) { uffd_handler_threads = @@ -293,9 +290,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vm_paddr_t vcpu_gpa; void *vcpu_hva; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); /* Cache the HVA pointer of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); @@ -312,7 +309,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], uffd_delay, &uffd_args[vcpu_id], - vcpu_hva, vcpu_memory_bytes); + vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); } @@ -413,7 +410,6 @@ static void help(char *name) int main(int argc, char *argv[]) { bool mode_selected = false; - uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; int vcpus = 1; unsigned int mode; int opt, i; @@ -463,7 +459,7 @@ int main(int argc, char *argv[]) "A negative UFFD delay is not supported."); break; case 'b': - vcpu_memory_bytes = parse_size(optarg); + guest_percpu_mem_size = parse_size(optarg); break; case 'v': vcpus = atoi(optarg); @@ -486,7 +482,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes); + run_test(i, use_uffd, uffd_delay, vcpus); } return 0; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 8e053e9dc7ea..840dc2a19ce1 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -21,6 +21,8 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + /* * Guest physical memory offset of the testing memory slot. * This will be set to the topmost valid physical address minus @@ -33,6 +35,7 @@ static uint64_t guest_test_phys_mem; * Must not conflict with identity mapped test code. */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; struct vcpu_args { uint64_t gva; -- cgit v1.3.1 From 3be18630954672b889186e7be9b631f00134e954 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:53 +0100 Subject: KVM: selftests: Make the number of vcpus global We also check the input number of vcpus against the maximum supported. Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-8-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 37 ++++++++++------------ .../testing/selftests/kvm/include/perf_test_util.h | 3 ++ 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 33acd954a298..3d96a7bfaff3 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -249,7 +249,7 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus) + useconds_t uffd_delay) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; @@ -261,7 +261,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, int vcpu_id; int r; - vm = create_vm(mode, vcpus, guest_percpu_mem_size); + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -270,23 +270,23 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, "Failed to allocate buffer for guest data pattern"); memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); - vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, vcpus, guest_percpu_mem_size); + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); if (use_uffd) { uffd_handler_threads = - malloc(vcpus * sizeof(*uffd_handler_threads)); + malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - uffd_args = malloc(vcpus * sizeof(*uffd_args)); + uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); TEST_ASSERT(uffd_args, "Memory allocation failed"); - pipefds = malloc(sizeof(int) * vcpus * 2); + pipefds = malloc(sizeof(int) * nr_vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vm_paddr_t vcpu_gpa; void *vcpu_hva; @@ -322,7 +322,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, &perf_test_args.vcpu_args[vcpu_id]); } @@ -330,7 +330,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Started all vCPUs\n"); /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_join(vcpu_threads[vcpu_id], NULL); PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } @@ -343,7 +343,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, char c; /* Tell the user fault fd handler threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { r = write(pipefds[vcpu_id * 2 + 1], &c, 1); TEST_ASSERT(r == 1, "Unable to write to pipefd"); @@ -354,7 +354,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - perf_test_args.vcpu_args[0].pages * vcpus / + perf_test_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); @@ -409,8 +409,8 @@ static void help(char *name) int main(int argc, char *argv[]) { + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); bool mode_selected = false; - int vcpus = 1; unsigned int mode; int opt, i; bool use_uffd = false; @@ -462,12 +462,9 @@ int main(int argc, char *argv[]) guest_percpu_mem_size = parse_size(optarg); break; case 'v': - vcpus = atoi(optarg); - TEST_ASSERT(vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -482,7 +479,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus); + run_test(i, use_uffd, uffd_delay); } return 0; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 840dc2a19ce1..05eeb5d2bfc4 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -37,6 +37,9 @@ static uint64_t guest_test_phys_mem; static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +/* Number of VCPUs for the test */ +static int nr_vcpus = 1; + struct vcpu_args { uint64_t gva; uint64_t pages; -- cgit v1.3.1 From 4fd94ec7d566ee2f0b52111cc6d26dd311f8a7c3 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:33 -0700 Subject: KVM: selftests: Introduce the dirty log perf test The dirty log perf test will time verious dirty logging operations (enabling dirty logging, dirtying memory, getting the dirty log, clearing the dirty log, and disabling dirty logging) in order to quantify dirty logging performance. This test can be used to inform future performance improvements to KVM's dirty logging infrastructure. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-6-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/dirty_log_perf_test.c | 376 +++++++++++++++++++++ .../testing/selftests/kvm/include/perf_test_util.h | 18 +- tools/testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 7 + 6 files changed, 396 insertions(+), 8 deletions(-) create mode 100644 tools/testing/selftests/kvm/dirty_log_perf_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index ea1692d43411..7a2c242b7152 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -27,6 +27,7 @@ /clear_dirty_log_test /demand_paging_test /dirty_log_test +/dirty_log_perf_test /kvm_create_max_vcpus /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c8af04dccf7f..3d14ef77755e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -61,6 +61,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c new file mode 100644 index 000000000000..cda7b000b1bc --- /dev/null +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging performance test + * + * Based on dirty_log_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2020, Google, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" +#include "test_util.h" + +/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ +#define TEST_HOST_LOOP_N 2UL + +/* Host variables */ +static bool host_quit; +static uint64_t iteration; +static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; + +static void *vcpu_worker(void *data) +{ + int ret; + struct kvm_vm *vm = perf_test_args.vm; + uint64_t pages_count = 0; + struct kvm_run *run; + struct timespec start; + struct timespec ts_diff; + struct timespec total = (struct timespec){0}; + struct timespec avg; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + while (!READ_ONCE(host_quit)) { + uint64_t current_iteration = READ_ONCE(iteration); + + clock_gettime(CLOCK_MONOTONIC, &start); + ret = _vcpu_run(vm, vcpu_id); + ts_diff = timespec_diff_now(start); + + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + + pr_debug("Got sync event from vCPU %d\n", vcpu_id); + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + pr_debug("vCPU %d updated last completed iteration to %lu\n", + vcpu_id, vcpu_last_completed_iteration[vcpu_id]); + + if (current_iteration) { + pages_count += vcpu_args->pages; + total = timespec_add(total, ts_diff); + pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } else { + pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } + + while (current_iteration == READ_ONCE(iteration) && + !READ_ONCE(host_quit)) {} + } + + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); + pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], + total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); + + return NULL; +} + +#ifdef USE_CLEAR_DIRTY_LOG +static u64 dirty_log_manual_caps; +#endif + +static void run_test(enum vm_guest_mode mode, unsigned long iterations, + uint64_t phys_offset, int wr_fract) +{ + pthread_t *vcpu_threads; + struct kvm_vm *vm; + unsigned long *bmap; + uint64_t guest_num_pages; + uint64_t host_num_pages; + int vcpu_id; + struct timespec start; + struct timespec ts_diff; + struct timespec get_dirty_log_total = (struct timespec){0}; + struct timespec vcpu_dirty_total = (struct timespec){0}; + struct timespec avg; +#ifdef USE_CLEAR_DIRTY_LOG + struct kvm_enable_cap cap = {}; + struct timespec clear_dirty_log_total = (struct timespec){0}; +#endif + + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + + perf_test_args.wr_fract = wr_fract; + + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); + bmap = bitmap_alloc(host_num_pages); + +#ifdef USE_CLEAR_DIRTY_LOG + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = dirty_log_manual_caps; + vm_enable_cap(vm, &cap); +#endif + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + + sync_global_to_guest(vm, perf_test_args); + + /* Start the iterations */ + iteration = 0; + host_quit = false; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + } + + /* Allow the vCPU to populate memory */ + pr_debug("Starting iteration %lu - Populating\n", iteration); + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n", + iteration); + + ts_diff = timespec_diff_now(start); + pr_info("Populate memory time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Enable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + KVM_MEM_LOG_DIRTY_PAGES); + ts_diff = timespec_diff_now(start); + pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + while (iteration < iterations) { + /* + * Incrementing the iteration number will start the vCPUs + * dirtying memory again. + */ + clock_gettime(CLOCK_MONOTONIC, &start); + iteration++; + + pr_debug("Starting iteration %lu\n", iteration); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n", + vcpu_id, iteration); + } + + ts_diff = timespec_diff_now(start); + vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff); + pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + + ts_diff = timespec_diff_now(start); + get_dirty_log_total = timespec_add(get_dirty_log_total, + ts_diff); + pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + host_num_pages); + + ts_diff = timespec_diff_now(start); + clear_dirty_log_total = timespec_add(clear_dirty_log_total, + ts_diff); + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); +#endif + } + + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + /* Disable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + ts_diff = timespec_diff_now(start); + pr_info("Disabling dirty logging time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + avg = timespec_div(get_dirty_log_total, iterations); + pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, get_dirty_log_total.tv_sec, + get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + avg = timespec_div(clear_dirty_log_total, iterations); + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, clear_dirty_log_total.tv_sec, + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); +#endif + + free(bmap); + free(vcpu_threads); + ucall_uninit(vm); + kvm_vm_free(vm); +} + +struct guest_mode { + bool supported; + bool enabled; +}; +static struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_init(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-i iterations] [-p offset] " + "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -m: specify the guest mode ID to test " + "(default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -f: specify the fraction of pages which should be written to\n" + " as opposed to simply read, in the form\n" + " 1/.\n" + " (default: 1 i.e. all pages are written to.)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + unsigned long iterations = TEST_HOST_LOOP_N; + bool mode_selected = false; + uint64_t phys_offset = 0; + unsigned int mode; + int opt, i; + int wr_fract = 1; + +#ifdef USE_CLEAR_DIRTY_LOG + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + if (!dirty_log_manual_caps) { + print_skip("KVM_CLEAR_DIRTY_LOG not available"); + exit(KSFT_SKIP); + } + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); +#endif + +#ifdef __x86_64__ + guest_mode_init(VM_MODE_PXXV48_4K, true, true); +#endif +#ifdef __aarch64__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_64K, true, true); + + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + if (limit >= 52) + guest_mode_init(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_init(VM_MODE_P48V48_4K, true, true); + guest_mode_init(VM_MODE_P48V48_64K, true, true); + } + } +#endif +#ifdef __s390x__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); +#endif + + while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'p': + phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'f': + wr_fract = atoi(optarg); + TEST_ASSERT(wr_fract >= 1, + "Write fraction cannot be less than one"); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0, + "Must have a positive number of vCPUs"); + TEST_ASSERT(nr_vcpus <= MAX_VCPUS, + "This test does not currently support\n" + "more than %d vCPUs.", MAX_VCPUS); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + + pr_info("Test iterations: %"PRIu64"\n", iterations); + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + run_test(i, iterations, phys_offset, wr_fract); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 05eeb5d2bfc4..2618052057b1 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -76,16 +76,18 @@ static void guest_code(uint32_t vcpu_id) gva = vcpu_args->gva; pages = vcpu_args->pages; - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); - if (i % perf_test_args.wr_fract == 0) - *(uint64_t *)addr = 0x0123456789ABCDEF; - else - READ_ONCE(*(uint64_t *)addr); - } + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } - GUEST_SYNC(1); + GUEST_SYNC(1); + } } static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 1cc036ddb0c5..ffffa560436b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -65,5 +65,6 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_diff_now(struct timespec start); +struct timespec timespec_div(struct timespec ts, int divisor); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 1a46c2c48c7c..8e04c0b1608e 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -92,6 +92,13 @@ struct timespec timespec_diff_now(struct timespec start) return timespec_sub(end, start); } +struct timespec timespec_div(struct timespec ts, int divisor) +{ + int64_t ns = timespec_to_ns(ts) / divisor; + + return timespec_add_ns((struct timespec){0}, ns); +} + void print_skip(const char *fmt, ...) { va_list ap; -- cgit v1.3.1 From 6d6a18fdde8b86b919b740ad629153de432d12a8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Nov 2020 09:45:17 -0500 Subject: KVM: selftests: allow two iterations of dirty_log_perf_test Even though one iteration is not enough for the dirty log performance test (due to the cost of building page tables, zeroing memory etc.) two is okay and it is the default. Without this patch, "./dirty_log_perf_test" without any further arguments fails. Cc: Ben Gardon Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index cda7b000b1bc..85c9b8f73142 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -359,7 +359,7 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); pr_info("Test iterations: %"PRIu64"\n", iterations); -- cgit v1.3.1 From 197afc631413d96dc60acfc7970bdd4125d38cd3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 6 Nov 2020 16:02:51 -0800 Subject: libbpf: Don't attempt to load unused subprog as an entry-point BPF program If BPF code contains unused BPF subprogram and there are no other subprogram calls (which can realistically happen in real-world applications given sufficiently smart Clang code optimizations), libbpf will erroneously assume that subprograms are entry-point programs and will attempt to load them with UNSPEC program type. Fix by not relying on subcall instructions and rather detect it based on the structure of BPF object's sections. Fixes: 9a94f277c4fb ("tools: libbpf: restore the ability to load programs from .text section") Reported-by: Dmitrii Banshchikov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201107000251.256821-1-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 23 ++++++++++++---------- tools/testing/selftests/bpf/prog_tests/subprogs.c | 6 ++++++ .../selftests/bpf/progs/test_subprogs_unused.c | 21 ++++++++++++++++++++ 3 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_subprogs_unused.c (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 313034117070..28baee7ba1ca 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -560,8 +560,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, const char *name, size_t sec_idx, const char *sec_name, size_t sec_off, void *insn_data, size_t insn_data_sz) { - int i; - if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", sec_name, name, sec_off, insn_data_sz); @@ -600,13 +598,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, goto errout; memcpy(prog->insns, insn_data, insn_data_sz); - for (i = 0; i < prog->insns_cnt; i++) { - if (insn_is_subprog_call(&prog->insns[i])) { - obj->has_subcalls = true; - break; - } - } - return 0; errout: pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); @@ -3280,7 +3271,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls; + /* For legacy reasons, libbpf supports an entry-point BPF programs + * without SEC() attribute, i.e., those in the .text section. But if + * there are 2 or more such programs in the .text section, they all + * must be subprograms called from entry-point BPF programs in + * designated SEC()'tions, otherwise there is no way to distinguish + * which of those programs should be loaded vs which are a subprogram. + * Similarly, if there is a function/program in .text and at least one + * other BPF program with custom SEC() attribute, then we just assume + * .text programs are subprograms (even if they are not called from + * other programs), because libbpf never explicitly supported mixing + * SEC()-designated BPF programs and .text entry-point BPF programs. + */ + return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; } struct bpf_program * diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c index a00abf58c037..3f3d2ac4dd57 100644 --- a/tools/testing/selftests/bpf/prog_tests/subprogs.c +++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c @@ -3,12 +3,14 @@ #include #include #include "test_subprogs.skel.h" +#include "test_subprogs_unused.skel.h" static int duration; void test_subprogs(void) { struct test_subprogs *skel; + struct test_subprogs_unused *skel2; int err; skel = test_subprogs__open_and_load(); @@ -26,6 +28,10 @@ void test_subprogs(void) CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19); CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36); + skel2 = test_subprogs_unused__open_and_load(); + ASSERT_OK_PTR(skel2, "unused_progs_skel"); + test_subprogs_unused__destroy(skel2); + cleanup: test_subprogs__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c new file mode 100644 index 000000000000..75d975f8cf90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c @@ -0,0 +1,21 @@ +#include "vmlinux.h" +#include +#include + +const char LICENSE[] SEC("license") = "GPL"; + +__attribute__((maybe_unused)) __noinline int unused1(int x) +{ + return x + 1; +} + +static __attribute__((maybe_unused)) __noinline int unused2(int x) +{ + return x + 2; +} + +SEC("raw_tp/sys_enter") +int main_prog(void *ctx) +{ + return 0; +} -- cgit v1.3.1 From f52b8fd332573106e60958617a3d2e30611ce1fb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 6 Nov 2020 14:54:02 -0800 Subject: bpf: selftest: Use static globals in tcp_hdr_options and btf_skc_cls_ingress Some globals in the tcp_hdr_options test and btf_skc_cls_ingress test are not using static scope. This patch fixes it. Targeting bpf-next branch as an improvement since it currently does not break the build. Fixes: ad2f8eb0095e ("bpf: selftests: Tcp header options") Fixes: 9a856cae2217 ("bpf: selftest: Add test_btf_skc_cls_ingress") Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201106225402.4135741-1-kafai@fb.com --- tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c | 2 +- tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 86ccf37e26b3..762f6a9da8b5 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -17,7 +17,7 @@ #include "test_btf_skc_cls_ingress.skel.h" static struct test_btf_skc_cls_ingress *skel; -struct sockaddr_in6 srv_sa6; +static struct sockaddr_in6 srv_sa6; static __u32 duration; #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index c85174cdcb77..08d19cafd5e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -18,12 +18,12 @@ #define LO_ADDR6 "::1" #define CG_NAME "/tcpbpf-hdr-opt-test" -struct bpf_test_option exp_passive_estab_in; -struct bpf_test_option exp_active_estab_in; -struct bpf_test_option exp_passive_fin_in; -struct bpf_test_option exp_active_fin_in; -struct hdr_stg exp_passive_hdr_stg; -struct hdr_stg exp_active_hdr_stg = { .active = true, }; +static struct bpf_test_option exp_passive_estab_in; +static struct bpf_test_option exp_active_estab_in; +static struct bpf_test_option exp_passive_fin_in; +static struct bpf_test_option exp_active_fin_in; +static struct hdr_stg exp_passive_hdr_stg; +static struct hdr_stg exp_active_hdr_stg = { .active = true, }; static struct test_misc_tcp_hdr_options *misc_skel; static struct test_tcp_hdr_options *skel; -- cgit v1.3.1 From 1db32acfde741359b0b1b9962ae8cd501c2ff769 Mon Sep 17 00:00:00 2001 From: Tanner Love Date: Fri, 6 Nov 2020 13:07:41 -0500 Subject: selftests/net: test max_num_members, fanout_args in psock_fanout Add an additional control test that verifies: -specifying two different max_num_members values fails -specifying max_num_members > PACKET_FANOUT_MAX fails In datapath tests, set max_num_members to PACKET_FANOUT_MAX. Signed-off-by: Tanner Love Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/psock_fanout.c | 72 ++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 2c522f7a0aec..db4521335722 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -56,12 +56,15 @@ #define RING_NUM_FRAMES 20 +static uint32_t cfg_max_num_members; + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) { struct sockaddr_ll addr = {0}; - int fd, val; + struct fanout_args args; + int fd, val, err; fd = socket(PF_PACKET, SOCK_RAW, 0); if (fd < 0) { @@ -83,8 +86,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) exit(1); } - val = (((int) typeflags) << 16) | group_id; - if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { + if (cfg_max_num_members) { + args.id = group_id; + args.type_flags = typeflags; + args.max_num_members = cfg_max_num_members; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, + sizeof(args)); + } else { + val = (((int) typeflags) << 16) | group_id; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, + sizeof(val)); + } + if (err) { if (close(fd)) { perror("close packet"); exit(1); @@ -286,6 +299,56 @@ static void test_control_group(void) } } +/* Test illegal max_num_members values */ +static void test_control_group_max_num_members(void) +{ + int fds[3]; + + fprintf(stderr, "test: control multiple sockets, max_num_members\n"); + + /* expected failure on greater than PACKET_FANOUT_MAX */ + cfg_max_num_members = (1 << 16) + 1; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n"); + exit(1); + } + + cfg_max_num_members = 256; + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed open\n"); + exit(1); + } + + /* expected failure on joining group with different max_num_members */ + cfg_max_num_members = 257; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: set different max_num_members\n"); + exit(1); + } + + /* success on joining group with same max_num_members */ + cfg_max_num_members = 256; + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[1] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + /* success on joining group with max_num_members unspecified */ + cfg_max_num_members = 0; + fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[2] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + if (close(fds[2]) || close(fds[1]) || close(fds[0])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + /* Test creating a unique fanout group ids */ static void test_unique_fanout_group_ids(void) { @@ -426,8 +489,11 @@ int main(int argc, char **argv) test_control_single(); test_control_group(); + test_control_group_max_num_members(); test_unique_fanout_group_ids(); + /* PACKET_FANOUT_MAX */ + cfg_max_num_members = 1 << 16; /* find a set of ports that do not collide onto the same socket */ ret = test_datapath(PACKET_FANOUT_HASH, port_off, expect_hash[0], expect_hash[1]); -- cgit v1.3.1 From 3e9fa9983b9297407c2448114d6d27782d5e2ef2 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 30 Sep 2020 21:04:05 -0400 Subject: tools/power turbostat: update version number goodbye summer... Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 74d2fc6fc78a..f3a1746f7f45 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5712,7 +5712,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 20.03.20" + fprintf(outf, "turbostat version 20.09.30" " - Len Brown \n"); } -- cgit v1.3.1 From c335b4f1f65012713832d988ec06512c7bda5c04 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Thu, 5 Nov 2020 15:24:40 -0800 Subject: kunit: tool: unmark test_data as binary blobs The tools/testing/kunit/test_data/ directory was marked as binary because some of the test_data files cause checkpatch warnings. Fix this by dropping the .gitattributes file. Fixes: afc63da64f1e ("kunit: kunit_parser: make parser more robust") Signed-off-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/.gitattributes | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tools/testing/kunit/.gitattributes (limited to 'tools') diff --git a/tools/testing/kunit/.gitattributes b/tools/testing/kunit/.gitattributes deleted file mode 100644 index 5b7da1fc3b8f..000000000000 --- a/tools/testing/kunit/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -test_data/* binary -- cgit v1.3.1 From 3959d0a63b3202ea2aa12b3f6effd5400d773d31 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Oct 2020 00:16:03 -0700 Subject: kunit: Fix kunit.py parse subcommand (use null build_dir) When JSON support was added in [1], the KunitParseRequest tuple was updated to contain a 'build_dir' field, but kunit.py parse doesn't accept --build_dir as an option. The code nevertheless tried to access it, resulting in this error: AttributeError: 'Namespace' object has no attribute 'build_dir' Given that the parser only uses the build_dir variable to set the 'build_environment' json field, we set it to None (which gives the JSON 'null') for now. Ultimately, we probably do want to be able to set this, but since it's new functionality which (for the parse subcommand) never worked, this is the quickest way of getting it back up and running. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/?h=kunit-fixes&id=21a6d1780d5bbfca0ce9b8104ca6233502fcbf86 Fixes: 21a6d1780d5b ("kunit: tool: allow generating test results in JSON") Signed-off-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index ebf5f5763dee..a6d5f219f714 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -337,7 +337,7 @@ def main(argv, linux=None): kunit_output = f.read().splitlines() request = KunitParseRequest(cli_args.raw_output, kunit_output, - cli_args.build_dir, + None, cli_args.json) result = parse_tests(request) if result.status != KunitStatus.SUCCESS: -- cgit v1.3.1 From b7e0b983ff13714d261883e89910b0755eb12169 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 21 Oct 2020 15:07:52 -0700 Subject: kunit: tool: fix pre-existing python type annotation errors The code uses annotations, but they aren't accurate. Note that type checking in python is a separate process, running `kunit.py run` will not check and complain about invalid types at runtime. Fix pre-existing issues found by running a type checker $ mypy *.py All but one of these were returning `None` without denoting this properly (via `Optional[Type]`). Signed-off-by: Daniel Latypov Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 84a1af2581f5..db0347baa428 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,7 +12,7 @@ from collections import namedtuple from datetime import datetime from enum import Enum, auto from functools import reduce -from typing import List +from typing import List, Optional, Tuple TestResult = namedtuple('TestResult', ['status','suites','log']) @@ -151,7 +151,7 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: else: return False -def parse_test_case(lines: List[str]) -> TestCase: +def parse_test_case(lines: List[str]) -> Optional[TestCase]: test_case = TestCase() save_non_diagnositic(lines, test_case) while parse_diagnostic(lines, test_case): @@ -163,7 +163,7 @@ def parse_test_case(lines: List[str]) -> TestCase: SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') -def parse_subtest_header(lines: List[str]) -> str: +def parse_subtest_header(lines: List[str]) -> Optional[str]: consume_non_diagnositic(lines) if not lines: return None @@ -176,7 +176,7 @@ def parse_subtest_header(lines: List[str]) -> str: SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') -def parse_subtest_plan(lines: List[str]) -> int: +def parse_subtest_plan(lines: List[str]) -> Optional[int]: consume_non_diagnositic(lines) match = SUBTEST_PLAN.match(lines[0]) if match: @@ -230,7 +230,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) return max_status(max_test_case_status, test_suite.status) -def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: +def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]: if not lines: return None consume_non_diagnositic(lines) @@ -271,7 +271,7 @@ def parse_tap_header(lines: List[str]) -> bool: TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') -def parse_test_plan(lines: List[str]) -> int: +def parse_test_plan(lines: List[str]) -> Optional[int]: consume_non_diagnositic(lines) match = TEST_PLAN.match(lines[0]) if match: @@ -310,7 +310,7 @@ def parse_test_result(lines: List[str]) -> TestResult: else: return TestResult(TestStatus.NO_TESTS, [], lines) -def print_and_count_results(test_result: TestResult) -> None: +def print_and_count_results(test_result: TestResult) -> Tuple[int, int, int]: total_tests = 0 failed_tests = 0 crashed_tests = 0 -- cgit v1.3.1 From fcdb0bc08ced274078f371e1e0fe6421a97fa9f2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 26 Oct 2020 18:59:25 +0200 Subject: kunit: Do not pollute source directory with generated files (.kunitconfig) When --build_dir is provided use it and do not pollute source directory which even can be mounted over network or read-only. Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 25 ++++++++++++------------- tools/testing/kunit/kunit_kernel.py | 24 +++++++++++++++++++----- 2 files changed, 31 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index a6d5f219f714..d4f7846d0745 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -11,7 +11,6 @@ import argparse import sys import os import time -import shutil from collections import namedtuple from enum import Enum, auto @@ -44,11 +43,6 @@ class KunitStatus(Enum): BUILD_FAILURE = auto() TEST_FAILURE = auto() -def create_default_kunitconfig(): - if not os.path.exists(kunit_kernel.kunitconfig_path): - shutil.copyfile('arch/um/configs/kunit_defconfig', - kunit_kernel.kunitconfig_path) - def get_kernel_root_path(): parts = sys.argv[0] if not __file__ else __file__ parts = os.path.realpath(parts).split('tools/testing/kunit') @@ -61,7 +55,6 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree, kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...') config_start = time.time() - create_default_kunitconfig() success = linux.build_reconfig(request.build_dir, request.make_options) config_end = time.time() if not success: @@ -262,12 +255,12 @@ def main(argv, linux=None): if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) - if not os.path.exists(kunit_kernel.kunitconfig_path): - create_default_kunitconfig() - if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitRequest(cli_args.raw_output, cli_args.timeout, cli_args.jobs, @@ -283,12 +276,12 @@ def main(argv, linux=None): not os.path.exists(cli_args.build_dir)): os.mkdir(cli_args.build_dir) - if not os.path.exists(kunit_kernel.kunitconfig_path): - create_default_kunitconfig() - if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitConfigRequest(cli_args.build_dir, cli_args.make_options) result = config_tests(linux, request) @@ -301,6 +294,9 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitBuildRequest(cli_args.jobs, cli_args.build_dir, cli_args.alltests, @@ -315,6 +311,9 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + exec_request = KunitExecRequest(cli_args.timeout, cli_args.build_dir, cli_args.alltests) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index b557b1e93f98..633a2efcfdbd 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,10 +6,10 @@ # Author: Felix Guo # Author: Brendan Higgins - import logging import subprocess import os +import shutil import signal from contextlib import ExitStack @@ -18,7 +18,8 @@ import kunit_config import kunit_parser KCONFIG_PATH = '.config' -kunitconfig_path = '.kunitconfig' +KUNITCONFIG_PATH = '.kunitconfig' +DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' class ConfigError(Exception): @@ -99,19 +100,22 @@ class LinuxSourceTreeOperations(object): stderr=subprocess.STDOUT) process.wait(timeout) - def get_kconfig_path(build_dir): kconfig_path = KCONFIG_PATH if build_dir: kconfig_path = os.path.join(build_dir, KCONFIG_PATH) return kconfig_path +def get_kunitconfig_path(build_dir): + kunitconfig_path = KUNITCONFIG_PATH + if build_dir: + kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH) + return kunitconfig_path + class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" def __init__(self): - self._kconfig = kunit_config.Kconfig() - self._kconfig.read_from_file(kunitconfig_path) self._ops = LinuxSourceTreeOperations() signal.signal(signal.SIGINT, self.signal_handler) @@ -123,6 +127,16 @@ class LinuxSourceTree(object): return False return True + def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH): + kunitconfig_path = get_kunitconfig_path(build_dir) + if not os.path.exists(kunitconfig_path): + shutil.copyfile(defconfig, kunitconfig_path) + + def read_kunitconfig(self, build_dir): + kunitconfig_path = get_kunitconfig_path(build_dir) + self._kconfig = kunit_config.Kconfig() + self._kconfig.read_from_file(kunitconfig_path) + def validate_config(self, build_dir): kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.Kconfig() -- cgit v1.3.1 From 128dc4bcc8c0c7c3bab4a3818a1ec608cccb017a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 26 Oct 2020 18:59:26 +0200 Subject: kunit: Do not pollute source directory with generated files (test.log) When --build_dir is provided use it and do not pollute source directory which even can be mounted over network or read-only. Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 633a2efcfdbd..b4768fa03ce0 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -21,6 +21,7 @@ KCONFIG_PATH = '.config' KUNITCONFIG_PATH = '.kunitconfig' DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' +OUTFILE_PATH = 'test.log' class ConfigError(Exception): """Represents an error trying to configure the Linux kernel.""" @@ -89,11 +90,12 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise BuildError(e.output.decode()) - def linux_bin(self, params, timeout, build_dir, outfile): + def linux_bin(self, params, timeout, build_dir): """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = './linux' if build_dir: linux_bin = os.path.join(build_dir, 'linux') + outfile = get_outfile_path(build_dir) with open(outfile, 'w') as output: process = subprocess.Popen([linux_bin] + params, stdout=output, @@ -112,6 +114,12 @@ def get_kunitconfig_path(build_dir): kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH) return kunitconfig_path +def get_outfile_path(build_dir): + outfile_path = OUTFILE_PATH + if build_dir: + outfile_path = os.path.join(build_dir, OUTFILE_PATH) + return outfile_path + class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" @@ -192,8 +200,8 @@ class LinuxSourceTree(object): def run_kernel(self, args=[], build_dir='', timeout=None): args.extend(['mem=1G']) - outfile = 'test.log' - self._ops.linux_bin(args, timeout, build_dir, outfile) + self._ops.linux_bin(args, timeout, build_dir) + outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) with open(outfile, 'r') as file: for line in file: -- cgit v1.3.1 From 390881448b1ff1e9d82896abbbda7cdb8e0be27c Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 29 Oct 2020 15:09:29 -0700 Subject: kunit: tool: print out stderr from make (like build warnings) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the tool redirects make stdout + stderr, and only shows them if the make command fails. This means build warnings aren't shown to the user. This change prints the contents of stderr even if make succeeds, under the assumption these are only build warnings or other messages the user likely wants to see. We drop stdout from the raised exception since we can no longer easily collate stdout and stderr and just showing the stderr seems fine. Example with a warning: [14:56:35] Building KUnit Kernel ... ../lib/kunit/kunit-test.c: In function ‘kunit_test_successful_try’: ../lib/kunit/kunit-test.c:19:6: warning: unused variable ‘unused’ [-Wunused-variable] 19 | int unused; | ^~~~~~ [14:56:40] Starting KUnit Kernel ... Note the stderr has a trailing \n, and since we use print, we add another, but it helps separate make and kunit.py output. Example with a build error: [15:02:45] Building KUnit Kernel ... ERROR:root:../lib/kunit/kunit-test.c: In function ‘kunit_test_successful_try’: ../lib/kunit/kunit-test.c:19:2: error: unknown type name ‘invalid_type’ 19 | invalid_type *test = data; | ^~~~~~~~~~~~ ... Signed-off-by: Daniel Latypov Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index b4768fa03ce0..2e3cc0fac726 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -84,11 +84,16 @@ class LinuxSourceTreeOperations(object): if build_dir: command += ['O=' + build_dir] try: - subprocess.check_output(command, stderr=subprocess.STDOUT) + proc = subprocess.Popen(command, + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL) except OSError as e: - raise BuildError('Could not call execute make: ' + str(e)) - except subprocess.CalledProcessError as e: - raise BuildError(e.output.decode()) + raise BuildError('Could not call make command: ' + str(e)) + _, stderr = proc.communicate() + if proc.returncode != 0: + raise BuildError(stderr.decode()) + if stderr: # likely only due to build warnings + print(stderr.decode()) def linux_bin(self, params, timeout, build_dir): """Runs the Linux UML binary. Must be named 'linux'.""" -- cgit v1.3.1 From 060352e141e4c71ce147a2737f6d30a97f2ec317 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Fri, 30 Oct 2020 15:38:53 -0700 Subject: kunit: tool: fix extra trailing \n in raw + parsed test output For simplcity, strip all trailing whitespace from parsed output. I imagine no one is printing out meaningful trailing whitespace via KUNIT_FAIL() or similar, and that if they are, they really shouldn't. `isolate_kunit_output()` yielded liens with trailing \n, which results in artifacty output like this: $ ./tools/testing/kunit/kunit.py run [16:16:46] [FAILED] example_simple_test [16:16:46] # example_simple_test: EXPECTATION FAILED at lib/kunit/kunit-example-test.c:29 [16:16:46] Expected 1 + 1 == 3, but [16:16:46] 1 + 1 == 2 [16:16:46] 3 == 3 [16:16:46] not ok 1 - example_simple_test [16:16:46] After this change: [16:16:46] # example_simple_test: EXPECTATION FAILED at lib/kunit/kunit-example-test.c:29 [16:16:46] Expected 1 + 1 == 3, but [16:16:46] 1 + 1 == 2 [16:16:46] 3 == 3 [16:16:46] not ok 1 - example_simple_test [16:16:46] We should *not* be expecting lines to end with \n in kunit_tool_test.py for this reason. Do the same for `raw_output()` as well which suffers from the same issue. This is a followup to [1], but rebased onto kunit-fixes to pick up the other raw_output() fix and fixes for kunit_tool_test.py. [1] https://lore.kernel.org/linux-kselftest/20201020233219.4146059-1-dlatypov@google.com/ Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 3 ++- tools/testing/kunit/kunit_tool_test.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index db0347baa428..bbfe1b4e4c1c 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -54,6 +54,7 @@ kunit_end_re = re.compile('(List of all partitions:|' def isolate_kunit_output(kernel_output): started = False for line in kernel_output: + line = line.rstrip() # line always has a trailing \n if kunit_start_re.search(line): prefix_len = len(line.split('TAP version')[0]) started = True @@ -65,7 +66,7 @@ def isolate_kunit_output(kernel_output): def raw_output(kernel_output): for line in kernel_output: - print(line) + print(line.rstrip()) DIVIDER = '=' * 60 diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 0b60855fb819..497ab51bc170 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -102,7 +102,7 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_output_isolated_correctly.log') file = open(log_path) result = kunit_parser.isolate_kunit_output(file.readlines()) - self.assertContains('TAP version 14\n', result) + self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: example', result) self.assertContains(' 1..2', result) self.assertContains(' ok 1 - example_simple_test', result) @@ -115,7 +115,7 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_pound_sign.log') with open(log_path) as file: result = kunit_parser.isolate_kunit_output(file.readlines()) - self.assertContains('TAP version 14\n', result) + self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: kunit-resource-test', result) self.assertContains(' 1..5', result) self.assertContains(' ok 1 - kunit_resource_test_init_resources', result) -- cgit v1.3.1 From ff2c395b9257f0e617f9cd212893f3c72c80ee6c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 4 Nov 2020 21:08:40 +1100 Subject: selftests/gpio: Use TEST_GEN_PROGS_EXTENDED Use TEST_GEN_PROGS_EXTENDED rather than TEST_PROGS_EXTENDED. That tells the lib.mk logic that the files it references are to be generated by the Makefile. Having done that we don't need to override the all rule. Signed-off-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/gpio/Makefile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 32bdc978a711..c85fb5acf5f4 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -11,22 +11,20 @@ LDLIBS += $(VAR_LDLIBS) TEST_PROGS := gpio-mockup.sh TEST_FILES := gpio-mockup-sysfs.sh -TEST_PROGS_EXTENDED := gpio-mockup-chardev +TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev GPIODIR := $(realpath ../../../gpio) GPIOOBJ := gpio-utils.o -all: $(TEST_PROGS_EXTENDED) - override define CLEAN - $(RM) $(TEST_PROGS_EXTENDED) + $(RM) $(TEST_GEN_PROGS_EXTENDED) $(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean endef KSFT_KHDR_INSTALL := 1 include ../lib.mk -$(TEST_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ) +$(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ) $(GPIODIR)/$(GPIOOBJ): $(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR) -- cgit v1.3.1 From 449539da2e237336bc750b41f1736a77f9aca25c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 4 Nov 2020 21:08:41 +1100 Subject: selftests/gpio: Move include of lib.mk up Move the include of lib.mk up so that in a subsequent patch we can use OUTPUT, which is initialised by lib.mk, in the definition of the GPIO variables. Signed-off-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/gpio/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index c85fb5acf5f4..615c8a953ade 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -13,6 +13,9 @@ TEST_PROGS := gpio-mockup.sh TEST_FILES := gpio-mockup-sysfs.sh TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev +KSFT_KHDR_INSTALL := 1 +include ../lib.mk + GPIODIR := $(realpath ../../../gpio) GPIOOBJ := gpio-utils.o @@ -21,9 +24,6 @@ override define CLEAN $(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean endef -KSFT_KHDR_INSTALL := 1 -include ../lib.mk - $(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ) $(GPIODIR)/$(GPIOOBJ): -- cgit v1.3.1 From b68c1c65dec5fb5186ebd33ce52059b4c6db8500 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 4 Nov 2020 21:08:42 +1100 Subject: selftests/gpio: Fix build when source tree is read only Currently the gpio selftests fail to build if the source tree is read only: make -j 160 -C tools/testing/selftests TARGETS=gpio make[1]: Entering directory '/linux/tools/testing/selftests/gpio' make OUTPUT=/linux/tools/gpio/ -C /linux/tools/gpio make[2]: Entering directory '/linux/tools/gpio' mkdir -p /linux/tools/gpio/include/linux 2>&1 || true ln -sf /linux/tools/gpio/../../include/uapi/linux/gpio.h /linux/tools/gpio/include/linux/gpio.h ln: failed to create symbolic link '/linux/tools/gpio/include/linux/gpio.h': Read-only file system This happens because we ask make to build ../../../gpio (tools/gpio) without pointing OUTPUT away from the source directory. To fix it we create a subdirectory of the existing OUTPUT directory, called tools-gpio, and tell tools/gpio to build in there. Signed-off-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/gpio/Makefile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 615c8a953ade..acf4088a9891 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -17,14 +17,18 @@ KSFT_KHDR_INSTALL := 1 include ../lib.mk GPIODIR := $(realpath ../../../gpio) -GPIOOBJ := gpio-utils.o +GPIOOUT := $(OUTPUT)/tools-gpio/ +GPIOOBJ := $(GPIOOUT)/gpio-utils.o override define CLEAN $(RM) $(TEST_GEN_PROGS_EXTENDED) - $(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean + $(RM) -rf $(GPIOOUT) endef -$(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ) +$(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ) -$(GPIODIR)/$(GPIOOBJ): - $(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR) +$(GPIOOUT): + mkdir -p $@ + +$(GPIOOBJ): $(GPIOOUT) + $(MAKE) OUTPUT=$(GPIOOUT) -C $(GPIODIR) -- cgit v1.3.1 From 85128c5bcdf9bd9b574d7cbafa49170a39fed2e1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 4 Nov 2020 21:08:43 +1100 Subject: selftests/gpio: Add to CLEAN rule rather than overriding Rather than overriding the CLEAN rule we can just append to it. Signed-off-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/gpio/Makefile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index acf4088a9891..41582fe485ee 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -20,10 +20,7 @@ GPIODIR := $(realpath ../../../gpio) GPIOOUT := $(OUTPUT)/tools-gpio/ GPIOOBJ := $(GPIOOUT)/gpio-utils.o -override define CLEAN - $(RM) $(TEST_GEN_PROGS_EXTENDED) - $(RM) -rf $(GPIOOUT) -endef +CLEAN += ; $(RM) -rf $(GPIOOUT) $(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ) -- cgit v1.3.1 From fc4a3a1bf9ad799181e4d4ec9c2598c0405bc27d Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Mon, 2 Nov 2020 09:39:18 +0200 Subject: selftests: intel_pstate: ftime() is deprecated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use clock_gettime() instead of deprecated ftime(). aperf.c: In function ‘main’: aperf.c:58:2: warning: ‘ftime’ is deprecated [-Wdeprecated-declarations] 58 | ftime(&before); | ^~~~~ In file included from aperf.c:9: /usr/include/sys/timeb.h:39:12: note: declared here 39 | extern int ftime (struct timeb *__timebuf) | ^~~~~ Signed-off-by: Tommi Rantala Signed-off-by: Shuah Khan --- tools/testing/selftests/intel_pstate/aperf.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c index f6cd03a87493..a8acf3996973 100644 --- a/tools/testing/selftests/intel_pstate/aperf.c +++ b/tools/testing/selftests/intel_pstate/aperf.c @@ -10,8 +10,12 @@ #include #include #include +#include #include "../kselftest.h" +#define MSEC_PER_SEC 1000L +#define NSEC_PER_MSEC 1000000L + void usage(char *name) { printf ("Usage: %s cpunum\n", name); } @@ -22,7 +26,7 @@ int main(int argc, char **argv) { long long tsc, old_tsc, new_tsc; long long aperf, old_aperf, new_aperf; long long mperf, old_mperf, new_mperf; - struct timeb before, after; + struct timespec before, after; long long int start, finish, total; cpu_set_t cpuset; @@ -55,7 +59,10 @@ int main(int argc, char **argv) { return 1; } - ftime(&before); + if (clock_gettime(CLOCK_MONOTONIC, &before) < 0) { + perror("clock_gettime"); + return 1; + } pread(fd, &old_tsc, sizeof(old_tsc), 0x10); pread(fd, &old_aperf, sizeof(old_mperf), 0xe7); pread(fd, &old_mperf, sizeof(old_aperf), 0xe8); @@ -64,7 +71,10 @@ int main(int argc, char **argv) { sqrt(i); } - ftime(&after); + if (clock_gettime(CLOCK_MONOTONIC, &after) < 0) { + perror("clock_gettime"); + return 1; + } pread(fd, &new_tsc, sizeof(new_tsc), 0x10); pread(fd, &new_aperf, sizeof(new_mperf), 0xe7); pread(fd, &new_mperf, sizeof(new_aperf), 0xe8); @@ -73,11 +83,11 @@ int main(int argc, char **argv) { aperf = new_aperf-old_aperf; mperf = new_mperf-old_mperf; - start = before.time*1000 + before.millitm; - finish = after.time*1000 + after.millitm; + start = before.tv_sec*MSEC_PER_SEC + before.tv_nsec/NSEC_PER_MSEC; + finish = after.tv_sec*MSEC_PER_SEC + after.tv_nsec/NSEC_PER_MSEC; total = finish - start; - printf("runTime: %4.2f\n", 1.0*total/1000); + printf("runTime: %4.2f\n", 1.0*total/MSEC_PER_SEC); printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total); return 0; } -- cgit v1.3.1 From 1c49e3783f8899555190a49024ac86d3d76633cd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 4 Nov 2020 21:03:05 +1100 Subject: selftests/memfd: Fix implicit declaration warnings The memfd tests emit several warnings: fuse_test.c:261:7: warning: implicit declaration of function 'open' fuse_test.c:67:6: warning: implicit declaration of function 'fcntl' memfd_test.c:397:6: warning: implicit declaration of function 'fallocate' memfd_test.c:64:7: warning: implicit declaration of function 'open' memfd_test.c:90:6: warning: implicit declaration of function 'fcntl' These are all caused by the test not including fcntl.h. Instead of including linux/fcntl.h, include fcntl.h, which should eventually cause the former to be included as well. Signed-off-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/memfd/fuse_test.c | 2 +- tools/testing/selftests/memfd/memfd_test.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c index b018e835737d..be675002f918 100644 --- a/tools/testing/selftests/memfd/fuse_test.c +++ b/tools/testing/selftests/memfd/fuse_test.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 334a7eea2004..74baab83fec3 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.3.1 From 82f147944c650a07831c796c398f5c973dbdde79 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Sat, 7 Nov 2020 17:19:35 +0800 Subject: tool: selftests: fix spelling typo of 'writting' writting -> writing Signed-off-by: Wang Qing Reviewed-by: Mike Rapoport Signed-off-by: Shuah Khan --- tools/testing/selftests/vm/userfaultfd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 9b0912a01777..9132fae7ad65 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -894,7 +894,7 @@ static int faulting_process(int signal_test) count_verify[nr]); } /* - * Trigger write protection if there is by writting + * Trigger write protection if there is by writing * the same value back. */ *area_count(area_dst, nr) = count; @@ -922,7 +922,7 @@ static int faulting_process(int signal_test) count_verify[nr]); exit(1); } /* - * Trigger write protection if there is by writting + * Trigger write protection if there is by writing * the same value back. */ *area_count(area_dst, nr) = count; -- cgit v1.3.1 From 93f20eff0cca972d74cb554a2e8b47730228be16 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 28 Oct 2020 16:31:14 +0800 Subject: selftests/run_kselftest.sh: fix dry-run typo Should be -d instead of -n for dry-run. Fixes: 5da1918446a1 ("selftests/run_kselftest.sh: Make each test individually selectable") Signed-off-by: Hangbin Liu Signed-off-by: Shuah Khan --- tools/testing/selftests/run_kselftest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 609a4ef9300e..97165a83df63 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -48,7 +48,7 @@ while true; do -l | --list) echo "$available" exit 0 ;; - -n | --dry-run) + -d | --dry-run) dryrun="echo" shift ;; -h | --help) -- cgit v1.3.1 From c2e46f6b3e3551558d44c4dc518b9667cb0d5f8b Mon Sep 17 00:00:00 2001 From: Sachin Sant Date: Fri, 6 Nov 2020 13:10:06 +0530 Subject: selftests/cgroup: Fix build on older distros On older distros struct clone_args does not have a cgroup member, leading to build errors: cgroup_util.c: In function 'clone_into_cgroup': cgroup_util.c:343:4: error: 'struct clone_args' has no member named 'cgroup' cgroup_util.c:346:33: error: invalid application of 'sizeof' to incomplete type 'struct clone_args' But the selftests already have a locally defined version of the structure which is up to date, called __clone_args. So use __clone_args which fixes the error. Signed-off-by: Michael Ellerman Signed-off-by: Sachin Sant > Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/cgroup/cgroup_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 05853b0b8831..027014662fb2 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -337,13 +337,13 @@ pid_t clone_into_cgroup(int cgroup_fd) #ifdef CLONE_ARGS_SIZE_VER2 pid_t pid; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_INTO_CGROUP, .exit_signal = SIGCHLD, .cgroup = cgroup_fd, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(struct __clone_args)); /* * Verify that this is a genuine test failure: * ENOSYS -> clone3() not available -- cgit v1.3.1 From 97adb13dc9ba08ecd4758bc59efc0205f5cbf377 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Sat, 7 Nov 2020 13:19:28 +0200 Subject: selftest: fix flower terse dump tests Iproute2 tc classifier terse dump has been accepted with modified syntax. Update the tests accordingly. Signed-off-by: Vlad Buslov Fixes: e7534fd42a99 ("selftests: implement flower classifier terse dump tests") Link: https://lore.kernel.org/r/20201107111928.453534-1-vlad@buslov.dev Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/filters/tests.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index bb543bf69d69..361235ad574b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -100,7 +100,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower.*handle", "matchCount": "1", "teardown": [ @@ -119,7 +119,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": " dst_mac e4:11:22:11:4a:51", "matchCount": "0", "teardown": [ -- cgit v1.3.1 From 1ccd58331f6f2af73758e572f8aa0215b0cacc0e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 7 Nov 2020 17:47:17 +0100 Subject: selftests: disable rp_filter when testing bareudp Some systems have rp_filter=1 as default configuration. This breaks bareudp.sh as the intermediate namespaces handle part of the routing with regular IPv4 routes but the reverse path is done with tc (flower/tunnel_key/mirred). Signed-off-by: Guillaume Nault Link: https://lore.kernel.org/r/28140b7d20161e4f766b558018fe2718f9bc1117.1604767577.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bareudp.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index c6fe22de7d0e..c2b9e990e544 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -234,6 +234,12 @@ setup_overlay_ipv4() ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 + + # The intermediate namespaces don't have routes for the reverse path, + # as it will be handled by tc. So we need to ensure that rp_filter is + # not going to block the traffic. + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 } setup_overlay_ipv6() -- cgit v1.3.1 From 5329722057d41aebc31e391907a501feaa42f7d9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Nov 2020 17:19:29 -0800 Subject: bpf: Assign ID to vmlinux BTF and return extra info for BTF in GET_OBJ_INFO Allocate ID for vmlinux BTF. This makes it visible when iterating over all BTF objects in the system. To allow distinguishing vmlinux BTF (and later kernel module BTF) from user-provided BTFs, expose extra kernel_btf flag, as well as BTF name ("vmlinux" for vmlinux BTF, will equal to module's name for module BTF). We might want to later allow specifying BTF name for user-provided BTFs as well, if that makes sense. But currently this is reserved only for in-kernel BTFs. Having in-kernel BTFs exposed IDs will allow to extend BPF APIs that require in-kernel BTF type with ability to specify BTF types from kernel modules, not just vmlinux BTF. This will be implemented in a follow up patch set for fentry/fexit/fmod_ret/lsm/etc. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201110011932.3201430-3-andrii@kernel.org --- include/uapi/linux/bpf.h | 3 +++ kernel/bpf/btf.c | 43 +++++++++++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 3 +++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9879d6793e90..162999b12790 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4466,6 +4466,9 @@ struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 727c1c27053f..856585db7aa7 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -214,6 +214,8 @@ struct btf { struct btf *base_btf; u32 start_id; /* first type ID in this BTF (0 for base BTF) */ u32 start_str_off; /* first string offset (0 for base BTF) */ + char name[MODULE_NAME_LEN]; + bool kernel_btf; }; enum verifier_phase { @@ -4429,6 +4431,8 @@ struct btf *btf_parse_vmlinux(void) btf->data = __start_BTF; btf->data_size = __stop_BTF - __start_BTF; + btf->kernel_btf = true; + snprintf(btf->name, sizeof(btf->name), "vmlinux"); err = btf_parse_hdr(env); if (err) @@ -4454,8 +4458,13 @@ struct btf *btf_parse_vmlinux(void) bpf_struct_ops_init(btf, log); - btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); + + err = btf_alloc_id(btf); + if (err) + goto errout; + + btf_verifier_env_free(env); return btf; errout: @@ -5553,7 +5562,9 @@ int btf_get_info_by_fd(const struct btf *btf, struct bpf_btf_info info; u32 info_copy, btf_copy; void __user *ubtf; - u32 uinfo_len; + char __user *uname; + u32 uinfo_len, uname_len, name_len; + int ret = 0; uinfo = u64_to_user_ptr(attr->info.info); uinfo_len = attr->info.info_len; @@ -5570,11 +5581,37 @@ int btf_get_info_by_fd(const struct btf *btf, return -EFAULT; info.btf_size = btf->data_size; + info.kernel_btf = btf->kernel_btf; + + uname = u64_to_user_ptr(info.name); + uname_len = info.name_len; + if (!uname ^ !uname_len) + return -EINVAL; + + name_len = strlen(btf->name); + info.name_len = name_len; + + if (uname) { + if (uname_len >= name_len + 1) { + if (copy_to_user(uname, btf->name, name_len + 1)) + return -EFAULT; + } else { + char zero = '\0'; + + if (copy_to_user(uname, btf->name, uname_len - 1)) + return -EFAULT; + if (put_user(zero, uname + uname_len - 1)) + return -EFAULT; + /* let user-space know about too short buffer */ + ret = -ENOSPC; + } + } + if (copy_to_user(uinfo, &info, info_copy) || put_user(info_copy, &uattr->info.info_len)) return -EFAULT; - return 0; + return ret; } int btf_get_fd_by_id(u32 id) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9879d6793e90..162999b12790 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4466,6 +4466,9 @@ struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { -- cgit v1.3.1 From cecaf4a0f2dcbd7e76156f6d63748b84c176b95b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Nov 2020 17:19:32 -0800 Subject: tools/bpftool: Add support for in-kernel and named BTF in `btf show` Display vmlinux BTF name and kernel module names when listing available BTFs on the system. In human-readable output mode, module BTFs are reported with "name [module-name]", while vmlinux BTF will be reported as "name [vmlinux]". Square brackets are added by bpftool and follow kernel convention when displaying modules in human-readable text outputs. [vmuser@archvm bpf]$ sudo ../../../bpf/bpftool/bpftool btf s 1: name [vmlinux] size 4082281B 6: size 2365B prog_ids 8,6 map_ids 3 7: name [button] size 46895B 8: name [pcspkr] size 42328B 9: name [serio_raw] size 39375B 10: name [floppy] size 57185B 11: name [i2c_core] size 76186B 12: name [crc32c_intel] size 16036B 13: name [i2c_piix4] size 50497B 14: name [irqbypass] size 14124B 15: name [kvm] size 197985B 16: name [kvm_intel] size 123564B 17: name [cryptd] size 42466B 18: name [crypto_simd] size 17187B 19: name [glue_helper] size 39205B 20: name [aesni_intel] size 41034B 25: size 36150B pids bpftool(2519) In JSON mode, two fields (boolean "kernel" and string "name") are reported for each BTF object. vmlinux BTF is reported with name "vmlinux" (kernel itself returns and empty name for vmlinux BTF). [vmuser@archvm bpf]$ sudo ../../../bpf/bpftool/bpftool btf s -jp [{ "id": 1, "size": 4082281, "prog_ids": [], "map_ids": [], "kernel": true, "name": "vmlinux" },{ "id": 6, "size": 2365, "prog_ids": [8,6 ], "map_ids": [3 ], "kernel": false },{ "id": 7, "size": 46895, "prog_ids": [], "map_ids": [], "kernel": true, "name": "button" },{ ... Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Tested-by: Alan Maguire Link: https://lore.kernel.org/bpf/20201110011932.3201430-6-andrii@kernel.org --- tools/bpf/bpftool/btf.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index c96b56e8e3a4..ed5e97157241 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -742,9 +742,14 @@ show_btf_plain(struct bpf_btf_info *info, int fd, struct btf_attach_table *btf_map_table) { struct btf_attach_point *obj; + const char *name = u64_to_ptr(info->name); int n; printf("%u: ", info->id); + if (info->kernel_btf) + printf("name [%s] ", name); + else if (name && name[0]) + printf("name %s ", name); printf("size %uB", info->btf_size); n = 0; @@ -771,6 +776,7 @@ show_btf_json(struct bpf_btf_info *info, int fd, struct btf_attach_table *btf_map_table) { struct btf_attach_point *obj; + const char *name = u64_to_ptr(info->name); jsonw_start_object(json_wtr); /* btf object */ jsonw_uint_field(json_wtr, "id", info->id); @@ -796,6 +802,11 @@ show_btf_json(struct bpf_btf_info *info, int fd, emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ + jsonw_bool_field(json_wtr, "kernel", info->kernel_btf); + + if (name && name[0]) + jsonw_string_field(json_wtr, "name", name); + jsonw_end_object(json_wtr); /* btf object */ } @@ -803,15 +814,30 @@ static int show_btf(int fd, struct btf_attach_table *btf_prog_table, struct btf_attach_table *btf_map_table) { - struct bpf_btf_info info = {}; + struct bpf_btf_info info; __u32 len = sizeof(info); + char name[64]; int err; + memset(&info, 0, sizeof(info)); err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get BTF object info: %s", strerror(errno)); return -1; } + /* if kernel support emitting BTF object name, pass name pointer */ + if (info.name_len) { + memset(&info, 0, sizeof(info)); + info.name_len = sizeof(name); + info.name = ptr_to_u64(name); + len = sizeof(info); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get BTF object info: %s", strerror(errno)); + return -1; + } + } if (json_output) show_btf_json(&info, fd, btf_prog_table, btf_map_table); -- cgit v1.3.1 From 58cfa49c2ba7f815adccc27a775e7cf8a8f7f539 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 10 Nov 2020 09:50:12 +0800 Subject: selftest/bpf: Add missed ip6ip6 test back In comment 173ca26e9b51 ("samples/bpf: add comprehensive ipip, ipip6, ip6ip6 test") we added ip6ip6 test for bpf tunnel testing. But in commit 933a741e3b82 ("selftests/bpf: bpf tunnel test.") when we moved it to the current folder, we didn't add it. This patch add the ip6ip6 test back to bpf tunnel test. Update the ipip6's topology for both IPv4 and IPv6 testing. Since iperf test is removed as currect framework simplified it in purpose, I also removed unused tcp checkings in test_tunnel_kern.c. Fixes: 933a741e3b82 ("selftests/bpf: bpf tunnel test.") Signed-off-by: Hangbin Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201110015013.1570716-2-liuhangbin@gmail.com --- .../testing/selftests/bpf/progs/test_tunnel_kern.c | 42 ++++----------------- tools/testing/selftests/bpf/test_tunnel.sh | 43 ++++++++++++++++++++-- 2 files changed, 46 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index f48dbfe24ddc..a621b58ab079 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -528,12 +527,11 @@ int _ipip_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct iphdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } @@ -541,16 +539,6 @@ int _ipip_set_tunnel(struct __sk_buff *skb) key.tunnel_ttl = 64; if (iph->protocol == IPPROTO_ICMP) { key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - } else { - if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) - return TC_ACT_SHOT; - - if (tcp->dest == bpf_htons(5200)) - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - else if (tcp->dest == bpf_htons(5201)) - key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ - else - return TC_ACT_SHOT; } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); @@ -585,19 +573,20 @@ int _ipip6_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct iphdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); @@ -634,35 +623,18 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct ipv6hdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } - key.remote_ipv6[0] = bpf_htonl(0x2401db00); key.tunnel_ttl = 64; - if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) { - key.remote_ipv6[3] = bpf_htonl(1); - } else { - if (iph->nexthdr != 6 /* NEXTHDR_TCP */) { - ERROR(iph->nexthdr); - return TC_ACT_SHOT; - } - - if (tcp->dest == bpf_htons(5200)) { - key.remote_ipv6[3] = bpf_htonl(1); - } else if (tcp->dest == bpf_htons(5201)) { - key.remote_ipv6[3] = bpf_htonl(2); - } else { - ERROR(tcp->dest); - return TC_ACT_SHOT; - } + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index bd12ec97a44d..1ccbe804e8e1 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -24,12 +24,12 @@ # Root namespace with metadata-mode tunnel + BPF # Device names and addresses: # veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) -# tunnel dev 11, ex: gre11, IPv4: 10.1.1.200 (overlay) +# tunnel dev 11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) # # Namespace at_ns0 with native tunnel # Device names and addresses: # veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) -# tunnel dev 00, ex: gre00, IPv4: 10.1.1.100 (overlay) +# tunnel dev 00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) # # # End-to-end ping packet flow @@ -250,7 +250,7 @@ add_ipip_tunnel() ip addr add dev $DEV 10.1.1.200/24 } -add_ipip6tnl_tunnel() +add_ip6tnl_tunnel() { ip netns exec at_ns0 ip addr add ::11/96 dev veth0 ip netns exec at_ns0 ip link set dev veth0 up @@ -262,11 +262,13 @@ add_ipip6tnl_tunnel() ip link add dev $DEV_NS type $TYPE \ local ::11 remote ::22 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96 ip netns exec at_ns0 ip link set dev $DEV_NS up # root namespace ip link add dev $DEV type $TYPE external ip addr add dev $DEV 10.1.1.200/24 + ip addr add dev $DEV 1::22/96 ip link set dev $DEV up } @@ -534,7 +536,7 @@ test_ipip6() check $TYPE config_device - add_ipip6tnl_tunnel + add_ip6tnl_tunnel ip link set dev veth1 mtu 1500 attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel # underlay @@ -553,6 +555,34 @@ test_ipip6() echo -e ${GREEN}"PASS: $TYPE"${NC} } +test_ip6ip6() +{ + TYPE=ip6tnl + DEV_NS=ip6ip6tnl00 + DEV=ip6ip6tnl11 + ret=0 + + check $TYPE + config_device + add_ip6tnl_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # ip6 over ip6 + ping6 $PING_ARG 1::11 + check_err $? + ip netns exec at_ns0 ping6 $PING_ARG 1::22 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: ip6$TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: ip6$TYPE"${NC} +} + setup_xfrm_tunnel() { auth=0x$(printf '1%.0s' {1..40}) @@ -646,6 +676,7 @@ cleanup() ip link del veth1 2> /dev/null ip link del ipip11 2> /dev/null ip link del ipip6tnl11 2> /dev/null + ip link del ip6ip6tnl11 2> /dev/null ip link del gretap11 2> /dev/null ip link del ip6gre11 2> /dev/null ip link del ip6gretap11 2> /dev/null @@ -742,6 +773,10 @@ bpf_tunnel_test() test_ipip6 errors=$(( $errors + $? )) + echo "Testing IP6IP6 tunnel..." + test_ip6ip6 + errors=$(( $errors + $? )) + echo "Testing IPSec tunnel..." test_xfrm_tunnel errors=$(( $errors + $? )) -- cgit v1.3.1 From f9f16dfbe76e63ba9aec68055c08242b09be297e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:45 +0800 Subject: perf mem: Search event name with more flexible path The perf tool searches a memory event name under the folder '/sys/devices/cpu/events/', this leads to the limitation for the selection of a memory profiling event which must be under this folder. Thus it's impossible to use any other event as memory event which is not under this specific folder, e.g. Arm SPE hardware event is not located in '/sys/devices/cpu/events/' so it cannot be enabled for memory profiling. This patch changes to search folder from '/sys/devices/cpu/events/' to '/sys/devices', so it give flexibility to find events which can be used for memory profiling. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ea0af0bc4314..35c8d175a9d2 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -18,8 +18,8 @@ unsigned int perf_mem_events__loads_ldlat = 30; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { - E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"), - E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"), + E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), + E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), }; #undef E @@ -93,7 +93,7 @@ int perf_mem_events__init(void) struct perf_mem_event *e = &perf_mem_events[j]; struct stat st; - scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s", + scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, e->sysfs_name); if (!stat(path, &st)) -- cgit v1.3.1 From eaf6aaeec5fa301c0eb8ae92962909b15d075e5f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:46 +0800 Subject: perf mem: Introduce weak function perf_mem_events__ptr() Different architectures might use different event or different event parameters for memory profiling, this patch introduces a weak perf_mem_events__ptr() function which allows to return back a architecture specific memory event. Since the variable 'perf_mem_events' can be only accessed by the perf_mem_events__ptr() function, mark the variable as 'static', this allows the architectures to define its own memory event array. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 18 ++++++++++++------ tools/perf/builtin-mem.c | 21 ++++++++++++++------- tools/perf/util/mem-events.c | 26 +++++++++++++++++++------- tools/perf/util/mem-events.h | 2 +- 4 files changed, 46 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index d5bea5d3cd51..4d1a08e38233 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2867,6 +2867,7 @@ static int perf_c2c__record(int argc, const char **argv) int ret; bool all_user = false, all_kernel = false; bool event_set = false; + struct perf_mem_event *e; struct option options[] = { OPT_CALLBACK('e', "event", &event_set, "event", "event selector. Use 'perf c2c record -e list' to list available events", @@ -2894,11 +2895,15 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "record"; if (!event_set) { - perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; - perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; } - if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + if (e->record) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; @@ -2906,12 +2911,13 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "--sample-cpu"; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - if (!perf_mem_events[j].record) + e = perf_mem_events__ptr(j); + if (!e->record) continue; - if (!perf_mem_events[j].supported) { + if (!e->supported) { pr_err("failed: event '%s' not supported\n", - perf_mem_events[j].name); + perf_mem_events__name(j)); free(rec_argv); return -1; } diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 3523279af6af..9a7df8d01296 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -64,6 +64,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) const char **rec_argv; int ret; bool all_user = false, all_kernel = false; + struct perf_mem_event *e; struct option options[] = { OPT_CALLBACK('e', "event", &mem, "event", "event selector. use 'perf mem record -e list' to list available events", @@ -86,13 +87,18 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; - if (mem->operation & MEM_OPERATION_LOAD) - perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + if (mem->operation & MEM_OPERATION_LOAD) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + } - if (mem->operation & MEM_OPERATION_STORE) - perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + if (mem->operation & MEM_OPERATION_STORE) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } - if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + if (e->record) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; @@ -101,10 +107,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "--phys-data"; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - if (!perf_mem_events[j].record) + e = perf_mem_events__ptr(j); + if (!e->record) continue; - if (!perf_mem_events[j].supported) { + if (!e->supported) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(j)); free(rec_argv); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 35c8d175a9d2..7a5a0d699e27 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -17,7 +17,7 @@ unsigned int perf_mem_events__loads_ldlat = 30; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } -struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { +static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), }; @@ -28,19 +28,31 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { static char mem_loads_name[100]; static bool mem_loads_name__init; +struct perf_mem_event * __weak perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + return &perf_mem_events[i]; +} + char * __weak perf_mem_events__name(int i) { + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (!e) + return NULL; + if (i == PERF_MEM_EVENTS__LOAD) { if (!mem_loads_name__init) { mem_loads_name__init = true; scnprintf(mem_loads_name, sizeof(mem_loads_name), - perf_mem_events[i].name, - perf_mem_events__loads_ldlat); + e->name, perf_mem_events__loads_ldlat); } return mem_loads_name; } - return (char *)perf_mem_events[i].name; + return (char *)e->name; } int perf_mem_events__parse(const char *str) @@ -61,7 +73,7 @@ int perf_mem_events__parse(const char *str) while (tok) { for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); if (strstr(e->tag, tok)) e->record = found = true; @@ -90,7 +102,7 @@ int perf_mem_events__init(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { char path[PATH_MAX]; - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); struct stat st; scnprintf(path, PATH_MAX, "%s/devices/%s", @@ -108,7 +120,7 @@ void perf_mem_events__list(void) int j; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); fprintf(stderr, "%-13s%-*s%s\n", e->tag, diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 904dad34f7f7..726a9c8103e4 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -31,13 +31,13 @@ enum { PERF_MEM_EVENTS__MAX, }; -extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; extern unsigned int perf_mem_events__loads_ldlat; int perf_mem_events__parse(const char *str); int perf_mem_events__init(void); char *perf_mem_events__name(int i); +struct perf_mem_event *perf_mem_events__ptr(int i); void perf_mem_events__list(void); -- cgit v1.3.1 From 4ba2452cd88f39da68a6dc05fcc95e8977fd6403 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:47 +0800 Subject: perf mem: Support new memory event PERF_MEM_EVENTS__LOAD_STORE On the architectures with perf memory profiling, two types of hardware events have been supported: load and store; if want to profile memory for both load and store operations, the tool will use these two events at the same time, the usage is: # perf mem record -t load,store -- uname But this cannot be applied for AUX tracing event, the same PMU event can be used to only trace memory load, or only memory store, or trace for both memory load and store. This patch introduces a new event PERF_MEM_EVENTS__LOAD_STORE, which is used to support the event which can record both memory load and store operations. When user specifies memory operation type as 'load,store', or doesn't set type so use 'load,store' as default, if the arch supports the event PERF_MEM_EVENTS__LOAD_STORE, the tool will convert the required operations to this single event; otherwise, if the arch doesn't support PERF_MEM_EVENTS__LOAD_STORE, the tool rolls back to enable both events PERF_MEM_EVENTS__LOAD and PERF_MEM_EVENTS__STORE, which keeps the same behaviour with before. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 24 ++++++++++++++++++------ tools/perf/util/mem-events.c | 13 ++++++++++++- tools/perf/util/mem-events.h | 1 + 3 files changed, 31 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 9a7df8d01296..21ebe0f47e64 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -87,14 +87,26 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; - if (mem->operation & MEM_OPERATION_LOAD) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); - e->record = true; - } + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE); - if (mem->operation & MEM_OPERATION_STORE) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + /* + * The load and store operations are required, use the event + * PERF_MEM_EVENTS__LOAD_STORE if it is supported. + */ + if (e->tag && + (mem->operation & MEM_OPERATION_LOAD) && + (mem->operation & MEM_OPERATION_STORE)) { e->record = true; + } else { + if (mem->operation & MEM_OPERATION_LOAD) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + } + + if (mem->operation & MEM_OPERATION_STORE) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } } e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 7a5a0d699e27..19007e463b8a 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -20,6 +20,7 @@ unsigned int perf_mem_events__loads_ldlat = 30; static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), + E(NULL, NULL, NULL), }; #undef E @@ -75,6 +76,9 @@ int perf_mem_events__parse(const char *str) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); + if (!e->tag) + continue; + if (strstr(e->tag, tok)) e->record = found = true; } @@ -105,6 +109,13 @@ int perf_mem_events__init(void) struct perf_mem_event *e = perf_mem_events__ptr(j); struct stat st; + /* + * If the event entry isn't valid, skip initialization + * and "e->supported" will keep false. + */ + if (!e->tag) + continue; + scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, e->sysfs_name); @@ -123,7 +134,7 @@ void perf_mem_events__list(void) struct perf_mem_event *e = perf_mem_events__ptr(j); fprintf(stderr, "%-13s%-*s%s\n", - e->tag, + e->tag ?: "", verbose > 0 ? 25 : 0, verbose > 0 ? perf_mem_events__name(j) : "", e->supported ? ": available" : ""); diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 726a9c8103e4..5ef178278909 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -28,6 +28,7 @@ struct mem_info { enum { PERF_MEM_EVENTS__LOAD, PERF_MEM_EVENTS__STORE, + PERF_MEM_EVENTS__LOAD_STORE, PERF_MEM_EVENTS__MAX, }; -- cgit v1.3.1 From 8b8173b45a7a9709cc2597548469708a8efbd0d9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:48 +0800 Subject: perf c2c: Support memory event PERF_MEM_EVENTS__LOAD_STORE When user doesn't specify event name, perf c2c tool enables both the load and store events, and this leads to failure for opening the duplicate PMU device for AUX trace. After the memory event PERF_MEM_EVENTS__LOAD_STORE is introduced, when the user doesn't specify event name, this patch converts the required operation to PERF_MEM_EVENTS__LOAD_STORE if the arch supports it. Otherwise, the tool still rolls back to enable events PERF_MEM_EVENTS__LOAD and PERF_MEM_EVENTS__STORE. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4d1a08e38233..98ae33eac6cc 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2895,11 +2895,20 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "record"; if (!event_set) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); - e->record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE); + /* + * The load and store operations are required, use the event + * PERF_MEM_EVENTS__LOAD_STORE if it is supported. + */ + if (e->tag) { + e->record = true; + } else { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; - e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); - e->record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } } e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); -- cgit v1.3.1 From 436cce00710a3f234ab6b735b5980256e773d388 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:49 +0800 Subject: perf mem: Only initialize memory event for recording It's needless to initialize memory events for reporting, this patch moves memory event initialization for only recording. Furthermore, the change allows to parse perf data on cross platforms, e.g. perf tool can report result properly even the machine doesn't support the memory events. Signed-off-by: Leo Yan Acked-by: Ian Rogers Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 21ebe0f47e64..72ce4b8fbb0f 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -77,6 +77,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_END() }; + if (perf_mem_events__init()) { + pr_err("failed: memory events not supported\n"); + return -1; + } + argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_KEEP_UNKNOWN); @@ -441,11 +446,6 @@ int cmd_mem(int argc, const char **argv) NULL }; - if (perf_mem_events__init()) { - pr_err("failed: memory events not supported\n"); - return -1; - } - argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_KEEP_UNKNOWN); -- cgit v1.3.1 From 014a771c7867fda5b40a95e1c7bc1aa5ac704c91 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:50 +0800 Subject: perf auxtrace: Add itrace option '-M' for memory events This patch is to add itrace option '-M' to synthesize memory event. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 1 + tools/perf/util/auxtrace.c | 4 ++++ tools/perf/util/auxtrace.h | 2 ++ 3 files changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index d3740c8f399b..079cdfabb352 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -11,6 +11,7 @@ d create a debug log f synthesize first level cache events m synthesize last level cache events + M synthesize memory events t synthesize TLB events a synthesize remote access events g synthesize a call chain (use with i or x) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 42a85c86421d..62e7f6c5f8b5 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1333,6 +1333,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->flc = true; synth_opts->llc = true; synth_opts->tlb = true; + synth_opts->mem = true; synth_opts->remote_access = true; if (no_sample) { @@ -1554,6 +1555,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'a': synth_opts->remote_access = true; break; + case 'M': + synth_opts->mem = true; + break; case 'q': synth_opts->quick += 1; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 951d2d14cf24..7e5c9e1552bd 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -88,6 +88,7 @@ enum itrace_period_type { * @llc: whether to synthesize last level cache events * @tlb: whether to synthesize TLB events * @remote_access: whether to synthesize remote access events + * @mem: whether to synthesize memory events * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -126,6 +127,7 @@ struct itrace_synth_opts { bool llc; bool tlb; bool remote_access; + bool mem; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; -- cgit v1.3.1 From 13e5df1e3f1ba1a90944362bc57690ea1369b3b7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:51 +0800 Subject: perf mem: Support AUX trace The 'perf mem' tool doesn't support AUX trace data so it cannot receive the hardware tracing data. On arm64, although it doesn't support PMU events for memory load and store, ARM SPE is a good candidate for memory profiling, the hardware tracer can record memory accessing operations with affiliated information (e.g. physical address and virtual address for accessing, cache levels, TLB walking, latency, etc). To allow "perf mem" tool to support AUX trace, this patch adds the AUX callbacks for session structure; make itrace memory event as default for "perf mem", this tells the AUX trace decoder to synthesize memory samples. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-8-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 72ce4b8fbb0f..fdfbff7592f4 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,6 +7,7 @@ #include "perf.h" #include +#include "util/auxtrace.h" #include "util/trace-event.h" #include "util/tool.h" #include "util/session.h" @@ -255,6 +256,12 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { + struct itrace_synth_opts itrace_synth_opts = { + .set = true, + .mem = true, /* Only enable memory event */ + .default_no_sample = true, + }; + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, @@ -267,6 +274,8 @@ static int report_raw_events(struct perf_mem *mem) if (IS_ERR(session)) return PTR_ERR(session); + session->itrace_synth_opts = &itrace_synth_opts; + if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, mem->cpu_bitmap); @@ -410,8 +419,12 @@ int cmd_mem(int argc, const char **argv) .comm = perf_event__process_comm, .lost = perf_event__process_lost, .fork = perf_event__process_fork, + .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, .namespaces = perf_event__process_namespaces, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .auxtrace_error = perf_event__process_auxtrace_error, .ordered_events = true, }, .input_name = "perf.data", -- cgit v1.3.1 From c825f7885178f994a2a00ca02016940d94aaed6e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:52 +0800 Subject: perf c2c: Support AUX trace This patch adds the AUX callbacks in session structure, so support AUX trace for "perf c2c" tool; make itrace memory event as default for "perf c2c", this tells the AUX trace decoder to synthesize samples and can be used for statistics. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-9-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 98ae33eac6cc..c5babeaa3b38 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -369,6 +369,10 @@ static struct perf_c2c c2c = { .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, + .attr = perf_event__process_attr, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .auxtrace_error = perf_event__process_auxtrace_error, .ordered_events = true, .ordering_requires_timestamps = true, }, @@ -2678,6 +2682,12 @@ static int setup_coalesce(const char *coalesce, bool no_source) static int perf_c2c__report(int argc, const char **argv) { + struct itrace_synth_opts itrace_synth_opts = { + .set = true, + .mem = true, /* Only enable memory event */ + .default_no_sample = true, + }; + struct perf_session *session; struct ui_progress prog; struct perf_data data = { @@ -2757,6 +2767,8 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } + session->itrace_synth_opts = &itrace_synth_opts; + err = setup_nodes(session); if (err) { pr_err("Failed setup nodes\n"); -- cgit v1.3.1 From 40714c58630aaaf1eb3acc431fe206a6b36a03d6 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Nov 2020 17:48:53 +0800 Subject: perf mem: Support ARM SPE events This patch adds ARM SPE events for perf memory profiling: 'spe-load': event for only recording memory load ops; 'spe-store': event for only recording memory store ops; 'spe-ldst': event for recording memory load and store ops. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-10-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 2 +- tools/perf/arch/arm64/util/mem-events.c | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/arm64/util/mem-events.c (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 8d2b9bcfffca..ead2f2275eee 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -10,4 +10,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ ../../arm/util/cs-etm.o \ - arm-spe.o + arm-spe.o mem-events.o diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c new file mode 100644 index 000000000000..2a2497372671 --- /dev/null +++ b/tools/perf/arch/arm64/util/mem-events.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "map_symbol.h" +#include "mem-events.h" + +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } + +static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("spe-load", "arm_spe_0/ts_enable=1,load_filter=1,store_filter=0,min_latency=%u/", "arm_spe_0"), + E("spe-store", "arm_spe_0/ts_enable=1,load_filter=0,store_filter=1/", "arm_spe_0"), + E("spe-ldst", "arm_spe_0/ts_enable=1,load_filter=1,store_filter=1,min_latency=%u/", "arm_spe_0"), +}; + +static char mem_ev_name[100]; + +struct perf_mem_event *perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + return &perf_mem_events[i]; +} + +char *perf_mem_events__name(int i) +{ + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) + scnprintf(mem_ev_name, sizeof(mem_ev_name), + e->name, perf_mem_events__loads_ldlat); + else /* PERF_MEM_EVENTS__STORE */ + scnprintf(mem_ev_name, sizeof(mem_ev_name), e->name); + + return mem_ev_name; +} -- cgit v1.3.1 From c185f1cde46653cd0a7a1eaf461d16c462870781 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:28 +0800 Subject: perf arm-spe: Include bitops.h for BIT() macro Include header linux/bitops.h, directly use its BIT() macro and remove the self defined macros. Committer notes: Use BIT_ULL() instead of BIT to build on 32-bit arches as mentioned in review by Andre Przywara . I noticed the build failure when crossbuilding to arm32 from x86_64. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 5 +---- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 7 +++---- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 93e063f22be5..cc18a1e8c212 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -21,10 +22,6 @@ #include "arm-spe-decoder.h" -#ifndef BIT -#define BIT(n) (1UL << (n)) -#endif - static u64 arm_spe_calc_ip(int index, u64 payload) { u8 *addr = (u8 *)&payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index b94001b756c7..5f65a3a70c57 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -8,13 +8,12 @@ #include #include #include +#include #include "arm-spe-pkt-decoder.h" -#define BIT(n) (1ULL << (n)) - -#define NS_FLAG BIT(63) -#define EL_FLAG (BIT(62) | BIT(61)) +#define NS_FLAG BIT_ULL(63) +#define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) #define SPE_HEADER0_PAD 0x0 #define SPE_HEADER0_END 0x1 -- cgit v1.3.1 From 903b659436b706928934ff5ef59d591267e5ce1a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:29 +0800 Subject: perf arm-spe: Fix a typo in comment Fix a typo: s/iff/if. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 5f65a3a70c57..12a96585da94 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -142,7 +142,7 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len, /* we use index to identify Events with a less number of * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS, - * LLC-REFILL, and REMOTE-ACCESS events are identified iff + * LLC-REFILL, and REMOTE-ACCESS events are identified if * index > 1. */ packet->index = ret - 1; -- cgit v1.3.1 From b2ded2e2e2764e502fc025f615210434f1eaa2a9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:30 +0800 Subject: perf arm-spe: Refactor payload size calculation This patch defines macro to extract "sz" field from header, and renames the function payloadlen() to arm_spe_payload_len(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 12a96585da94..a8eb7be189ec 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -69,22 +69,22 @@ const char *arm_spe_pkt_name(enum arm_spe_pkt_type type) return arm_spe_packet_name[type]; } -/* return ARM SPE payload size from its encoding, - * which is in bits 5:4 of the byte. - * 00 : byte - * 01 : halfword (2) - * 10 : word (4) - * 11 : doubleword (8) +/* + * Extracts the field "sz" from header bits and converts to bytes: + * 00 : byte (1) + * 01 : halfword (2) + * 10 : word (4) + * 11 : doubleword (8) */ -static int payloadlen(unsigned char byte) +static unsigned int arm_spe_payload_len(unsigned char hdr) { - return 1 << ((byte & 0x30) >> 4); + return 1U << ((hdr & GENMASK_ULL(5, 4)) >> 4); } static int arm_spe_get_payload(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { - size_t payload_len = payloadlen(buf[0]); + size_t payload_len = arm_spe_payload_len(buf[0]); if (len < 1 + payload_len) return ARM_SPE_NEED_MORE_BYTES; -- cgit v1.3.1 From b65577baf482909225c79d8a6bad44d2a62751f4 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:31 +0800 Subject: perf arm-spe: Refactor arm_spe_get_events() In function arm_spe_get_events(), the event packet's 'index' is assigned as payload length, but the flow is not directive: it firstly gets the packet length from the return value of arm_spe_get_payload(), the value includes header length (1) and payload length: int ret = arm_spe_get_payload(buf, len, packet); and then reduces header length from packet length, so finally get the payload length: packet->index = ret - 1; To simplify the code, this patch directly assigns payload length to event packet's index; and at the end it calls arm_spe_get_payload() to return the payload value. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index a8eb7be189ec..57904da89db1 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -136,8 +136,6 @@ static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, static int arm_spe_get_events(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { - int ret = arm_spe_get_payload(buf, len, packet); - packet->type = ARM_SPE_EVENTS; /* we use index to identify Events with a less number of @@ -145,9 +143,9 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len, * LLC-REFILL, and REMOTE-ACCESS events are identified if * index > 1. */ - packet->index = ret - 1; + packet->index = arm_spe_payload_len(buf[0]); - return ret; + return arm_spe_get_payload(buf, len, packet); } static int arm_spe_get_data_source(const unsigned char *buf, size_t len, -- cgit v1.3.1 From 0a04244cabc5560ce1e08555e8712a4cd20ab6ce Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 11 Nov 2020 15:11:32 +0800 Subject: perf arm-spe: Fix packet length handling When processing address packet and counter packet, if the packet contains extended header, it misses to account the extra one byte for header length calculation, thus returns the wrong packet length. To correct the packet length calculation, one possible fixing is simply to plus extra 1 for extended header, but will spread some duplicate code in the flows for processing address packet and counter packet. Alternatively, we can refine the function arm_spe_get_payload() to not only support short header and allow it to support extended header, and rely on it for the packet length calculation. So this patch refactors function arm_spe_get_payload() with a new argument 'ext_hdr' for support extended header; the packet processing flows can invoke this function to unify the packet length calculation. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 34 ++++++++-------------- 1 file changed, 12 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 57904da89db1..671a4763fb47 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -82,14 +82,15 @@ static unsigned int arm_spe_payload_len(unsigned char hdr) } static int arm_spe_get_payload(const unsigned char *buf, size_t len, + unsigned char ext_hdr, struct arm_spe_pkt *packet) { - size_t payload_len = arm_spe_payload_len(buf[0]); + size_t payload_len = arm_spe_payload_len(buf[ext_hdr]); - if (len < 1 + payload_len) + if (len < 1 + ext_hdr + payload_len) return ARM_SPE_NEED_MORE_BYTES; - buf++; + buf += 1 + ext_hdr; switch (payload_len) { case 1: packet->payload = *(uint8_t *)buf; break; @@ -99,7 +100,7 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len, default: return ARM_SPE_BAD_PACKET; } - return 1 + payload_len; + return 1 + ext_hdr + payload_len; } static int arm_spe_get_pad(struct arm_spe_pkt *packet) @@ -130,7 +131,7 @@ static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_TIMESTAMP; - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_events(const unsigned char *buf, size_t len, @@ -145,14 +146,14 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len, */ packet->index = arm_spe_payload_len(buf[0]); - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_data_source(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_DATA_SOURCE; - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_context(const unsigned char *buf, size_t len, @@ -160,8 +161,7 @@ static int arm_spe_get_context(const unsigned char *buf, size_t len, { packet->type = ARM_SPE_CONTEXT; packet->index = buf[0] & 0x3; - - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_op_type(const unsigned char *buf, size_t len, @@ -169,41 +169,31 @@ static int arm_spe_get_op_type(const unsigned char *buf, size_t len, { packet->type = ARM_SPE_OP_TYPE; packet->index = buf[0] & 0x3; - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_counter(const unsigned char *buf, size_t len, const unsigned char ext_hdr, struct arm_spe_pkt *packet) { - if (len < 2) - return ARM_SPE_NEED_MORE_BYTES; - packet->type = ARM_SPE_COUNTER; if (ext_hdr) packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); else packet->index = buf[0] & 0x7; - packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); - - return 1 + ext_hdr + 2; + return arm_spe_get_payload(buf, len, ext_hdr, packet); } static int arm_spe_get_addr(const unsigned char *buf, size_t len, const unsigned char ext_hdr, struct arm_spe_pkt *packet) { - if (len < 8) - return ARM_SPE_NEED_MORE_BYTES; - packet->type = ARM_SPE_ADDRESS; if (ext_hdr) packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); else packet->index = buf[0] & 0x7; - memcpy_le64(&packet->payload, buf + 1, 8); - - return 1 + ext_hdr + 8; + return arm_spe_get_payload(buf, len, ext_hdr, packet); } static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, -- cgit v1.3.1 From c8a950d0d3b926a02c7b2e713850d38217cec3d1 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 10 Nov 2020 17:43:05 +0100 Subject: tools: Factor HOSTCC, HOSTLD, HOSTAR definitions Several Makefiles in tools/ need to define the host toolchain variables. Move their definition to tools/scripts/Makefile.include Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/bpf/20201110164310.2600671-2-jean-philippe@linaro.org --- tools/bpf/resolve_btfids/Makefile | 9 --------- tools/build/Makefile | 4 ---- tools/objtool/Makefile | 9 --------- tools/perf/Makefile.perf | 4 ---- tools/power/acpi/Makefile.config | 1 - tools/scripts/Makefile.include | 10 ++++++++++ 6 files changed, 10 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 66cb92136de4..bf656432ad73 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -18,15 +18,6 @@ else endif # always use the host compiler -ifneq ($(LLVM),) -HOSTAR ?= llvm-ar -HOSTCC ?= clang -HOSTLD ?= ld.lld -else -HOSTAR ?= ar -HOSTCC ?= gcc -HOSTLD ?= ld -endif AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) diff --git a/tools/build/Makefile b/tools/build/Makefile index 722f1700d96a..bae48e6fa995 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -15,10 +15,6 @@ endef $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,LD,$(CROSS_COMPILE)ld) -HOSTCC ?= gcc -HOSTLD ?= ld -HOSTAR ?= ar - export HOSTCC HOSTLD HOSTAR ifeq ($(V),1) diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 4ea9a833dde7..5cdb19036d7f 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -3,15 +3,6 @@ include ../scripts/Makefile.include include ../scripts/Makefile.arch # always use the host compiler -ifneq ($(LLVM),) -HOSTAR ?= llvm-ar -HOSTCC ?= clang -HOSTLD ?= ld.lld -else -HOSTAR ?= ar -HOSTCC ?= gcc -HOSTLD ?= ld -endif AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7ce3f2e8b9c7..62f3deb1d3a8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -175,10 +175,6 @@ endef LD += $(EXTRA_LDFLAGS) -HOSTCC ?= gcc -HOSTLD ?= ld -HOSTAR ?= ar - PKG_CONFIG = $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 54a2857c2510..331f6d30f472 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -54,7 +54,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS_COMPILE ?= $(CROSS) LD = $(CC) -HOSTCC = gcc # check if compiler option is supported cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index a7974638561c..1358e89cdf7d 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -59,6 +59,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld) $(call allow-override,CXX,$(CROSS_COMPILE)g++) $(call allow-override,STRIP,$(CROSS_COMPILE)strip) +ifneq ($(LLVM),) +HOSTAR ?= llvm-ar +HOSTCC ?= clang +HOSTLD ?= ld.lld +else +HOSTAR ?= ar +HOSTCC ?= gcc +HOSTLD ?= ld +endif + ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif -- cgit v1.3.1 From 9e8929fdbb9c9026bd3a732e9ac7dc9617c86309 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 10 Nov 2020 17:43:06 +0100 Subject: tools/bpftool: Force clean of out-of-tree build Cleaning a partial build can fail if the output directory for libbpf wasn't created: $ make -C tools/bpf/bpftool O=/tmp/bpf clean /bin/sh: line 0: cd: /tmp/bpf/libbpf/: No such file or directory tools/scripts/Makefile.include:17: *** output directory "/tmp/bpf/libbpf/" does not exist. Stop. make: *** [Makefile:36: /tmp/bpf/libbpf/libbpf.a-clean] Error 2 As a result make never gets around to clearing the leftover objects. Add the libbpf output directory as clean dependency to ensure clean always succeeds (similarly to the "descend" macro). The directory is later removed by the clean recipe. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201110164310.2600671-3-jean-philippe@linaro.org --- tools/bpf/bpftool/Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f60e6ad3a1df..1358c093b812 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -27,11 +27,13 @@ LIBBPF = $(LIBBPF_PATH)libbpf.a BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) -$(LIBBPF): FORCE - $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT)) +$(LIBBPF_OUTPUT): + $(QUIET_MKDIR)mkdir -p $@ + +$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a -$(LIBBPF)-clean: +$(LIBBPF)-clean: $(LIBBPF_OUTPUT) $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null -- cgit v1.3.1 From 8859b0da5aac28e4e9651c8971e7af344f8ffec1 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 10 Nov 2020 17:43:07 +0100 Subject: tools/bpftool: Fix cross-build The bpftool build first creates an intermediate binary, executed on the host, to generate skeletons required by the final build. When cross-building bpftool for an architecture different from the host, the intermediate binary should be built using the host compiler (gcc) and the final bpftool using the cross compiler (e.g. aarch64-linux-gnu-gcc). Generate the intermediate objects into the bootstrap/ directory using the host toolchain. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201110164310.2600671-4-jean-philippe@linaro.org --- tools/bpf/bpftool/Makefile | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 1358c093b812..d566bced135e 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -19,24 +19,37 @@ BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) LIBBPF_OUTPUT = $(OUTPUT)/libbpf/ LIBBPF_PATH = $(LIBBPF_OUTPUT) + BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/ else + LIBBPF_OUTPUT = LIBBPF_PATH = $(BPF_DIR) + BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/ endif LIBBPF = $(LIBBPF_PATH)libbpf.a +LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ +LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) -$(LIBBPF_OUTPUT): +$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): $(QUIET_MKDIR)mkdir -p $@ $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a +$(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ + ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ + $(LIBBPF)-clean: $(LIBBPF_OUTPUT) $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null +$(LIBBPF_BOOTSTRAP)-clean: $(LIBBPF_BOOTSTRAP_OUTPUT) + $(call QUIET_CLEAN, libbpf-bootstrap) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) clean >/dev/null + prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions @@ -94,6 +107,7 @@ CFLAGS += -DCOMPAT_NEED_REALLOCARRAY endif LIBS = $(LIBBPF) -lelf -lz +LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz ifeq ($(feature-libcap), 1) CFLAGS += -DUSE_LIBCAP LIBS += -lcap @@ -120,9 +134,9 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) endif -BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap) +BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool -BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o) +BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ @@ -169,12 +183,16 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(OUTPUT)feature.o: | zdep -$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS) +$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) + $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ + $(LIBS_BOOTSTRAP) $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) +$(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT) + $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< + $(OUTPUT)%.o: %.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< @@ -182,11 +200,11 @@ feature-detect-clean: $(call QUIET_CLEAN, feature-detect) $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null -clean: $(LIBBPF)-clean feature-detect-clean +clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d - $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h - $(Q)$(RM) -r -- $(OUTPUT)libbpf/ + $(Q)$(RM) -- $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h + $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool $(Q)$(RM) -r -- $(OUTPUT)feature/ -- cgit v1.3.1 From 3290996e713315dfc9589e2c340a4c4997d90be8 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 10 Nov 2020 17:43:08 +0100 Subject: tools/runqslower: Use Makefile.include Makefile.include defines variables such as OUTPUT and CC for out-of-tree build and cross-build. Include it into the runqslower Makefile and use its $(QUIET*) helpers. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201110164310.2600671-5-jean-philippe@linaro.org --- tools/bpf/runqslower/Makefile | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index fb1337d69868..bcc4a7396713 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +include ../../scripts/Makefile.include + OUTPUT := .output CLANG ?= clang LLC ?= llc @@ -21,10 +23,8 @@ VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ abs_out := $(abspath $(OUTPUT)) ifeq ($(V),1) Q = -msg = else Q = @ -msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; MAKEFLAGS += --no-print-directory submake_extras := feature_display=0 endif @@ -37,12 +37,11 @@ all: runqslower runqslower: $(OUTPUT)/runqslower clean: - $(call msg,CLEAN) + $(call QUIET_CLEAN, runqslower) $(Q)rm -rf $(OUTPUT) runqslower $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) - $(call msg,BINARY,$@) - $(Q)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ + $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ $(OUTPUT)/runqslower.bpf.o @@ -50,31 +49,26 @@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ $(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) - $(call msg,GEN-SKEL,$@) - $(Q)$(BPFTOOL) gen skeleton $< > $@ + $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) - $(call msg,BPF,$@) - $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ + $(QUIET_GEN)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ -c $(filter %.c,$^) -o $@ && \ $(LLVM_STRIP) -g $@ $(OUTPUT)/%.o: %.c | $(OUTPUT) - $(call msg,CC,$@) - $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + $(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ $(OUTPUT): - $(call msg,MKDIR,$@) - $(Q)mkdir -p $(OUTPUT) + $(QUIET_MKDIR)mkdir -p $(OUTPUT) $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) - $(call msg,GEN,$@) $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \ "specify its location." >&2; \ exit 1;\ fi - $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ + $(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ -- cgit v1.3.1 From 85e59344d0790379e063bf3cd3dd0fe91ce3b505 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 10 Nov 2020 17:43:09 +0100 Subject: tools/runqslower: Enable out-of-tree build Enable out-of-tree build for runqslower. Only set OUTPUT=.output if it wasn't already set by the user. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201110164310.2600671-6-jean-philippe@linaro.org --- tools/bpf/runqslower/Makefile | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index bcc4a7396713..0fc4d4046193 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -1,15 +1,18 @@ # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) include ../../scripts/Makefile.include -OUTPUT := .output +OUTPUT ?= $(abspath .output)/ + CLANG ?= clang LLC ?= llc LLVM_STRIP ?= llvm-strip -DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool +BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ +DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) -BPFOBJ := $(OUTPUT)/libbpf.a -BPF_INCLUDE := $(OUTPUT) +BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ +BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a +BPF_INCLUDE := $(BPFOBJ_OUTPUT) INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \ -I$(abspath ../../include/uapi) CFLAGS := -g -Wall @@ -20,7 +23,6 @@ VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL) VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ $(wildcard $(VMLINUX_BTF_PATHS)))) -abs_out := $(abspath $(OUTPUT)) ifeq ($(V),1) Q = else @@ -38,7 +40,11 @@ runqslower: $(OUTPUT)/runqslower clean: $(call QUIET_CLEAN, runqslower) - $(Q)rm -rf $(OUTPUT) runqslower + $(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT) + $(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h + $(Q)$(RM) $(OUTPUT)runqslower + $(Q)$(RM) -r .output $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ @@ -59,8 +65,8 @@ $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) $(OUTPUT)/%.o: %.c | $(OUTPUT) $(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ -$(OUTPUT): - $(QUIET_MKDIR)mkdir -p $(OUTPUT) +$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT): + $(QUIET_MKDIR)mkdir -p $@ $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ @@ -70,10 +76,8 @@ $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) fi $(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ - OUTPUT=$(abspath $(dir $@))/ $(abspath $@) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@ -$(DEFAULT_BPFTOOL): - $(Q)$(MAKE) $(submake_extras) -C ../bpftool \ - prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install +$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) -- cgit v1.3.1 From 2d9393fefb506fb8c8a483e5dc9bbd7774657b89 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 10 Nov 2020 17:43:10 +0100 Subject: tools/runqslower: Build bpftool using HOSTCC When cross building runqslower for an other architecture, the intermediate bpftool used to generate a skeleton must be built using the host toolchain. Pass HOSTCC and HOSTLD, defined in Makefile.include, to the bpftool Makefile. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201110164310.2600671-7-jean-philippe@linaro.org --- tools/bpf/runqslower/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 0fc4d4046193..4d5ca54fcd4c 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -80,4 +80,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@ $(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ + CC=$(HOSTCC) LD=$(HOSTLD) -- cgit v1.3.1 From 0639e5e97ad9c58dd15dcf6f6ccf677cfba39f98 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 10 Nov 2020 17:43:11 +0100 Subject: tools/bpftool: Fix build slowdown Commit ba2fd563b740 ("tools/bpftool: Support passing BPFTOOL_VERSION to make") changed BPFTOOL_VERSION to a recursively expanded variable, forcing it to be recomputed on every expansion of CFLAGS and dramatically slowing down the bpftool build. Restore BPFTOOL_VERSION as a simply expanded variable, guarded by an ifeq(). Fixes: ba2fd563b740 ("tools/bpftool: Support passing BPFTOOL_VERSION to make") Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201110164310.2600671-8-jean-philippe@linaro.org --- tools/bpf/bpftool/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index d566bced135e..804ade95929f 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -30,7 +30,9 @@ LIBBPF = $(LIBBPF_PATH)libbpf.a LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a -BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) +ifeq ($(BPFTOOL_VERSION),) +BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) +endif $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): $(QUIET_MKDIR)mkdir -p $@ -- cgit v1.3.1 From ef1220a7d4bbdb5fc435d691776778568dfb69a8 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Tue, 10 Nov 2020 10:00:48 +0800 Subject: selftests: pmtu.sh: use $ksft_skip for skipped return code This test uses return code 2 as a hard-coded skipped state, let's use the kselftest framework skip code variable $ksft_skip instead to make it more readable and easier to maintain. Signed-off-by: Po-Hsu Lin Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 64 ++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 6bbf69a28e12..fb53987ab64b 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -355,7 +355,7 @@ setup_fou_or_gue() { encap="${3}" if [ "${outer}" = "4" ]; then - modprobe fou || return 2 + modprobe fou || return $ksft_skip a_addr="${prefix4}.${a_r1}.1" b_addr="${prefix4}.${b_r1}.1" if [ "${inner}" = "4" ]; then @@ -366,7 +366,7 @@ setup_fou_or_gue() { ipproto="41" fi else - modprobe fou6 || return 2 + modprobe fou6 || return $ksft_skip a_addr="${prefix6}:${a_r1}::1" b_addr="${prefix6}:${b_r1}::1" if [ "${inner}" = "4" ]; then @@ -380,8 +380,8 @@ setup_fou_or_gue() { fi fi - run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 - run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip + run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 @@ -455,7 +455,7 @@ setup_ipvX_over_ipvY() { fi fi - run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2 + run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode} run_cmd ${ns_a} ip link set ip_a up @@ -713,7 +713,7 @@ setup_routing() { } setup_bridge() { - run_cmd ${ns_a} ip link add br0 type bridge || return 2 + run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip run_cmd ${ns_a} ip link set br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -765,7 +765,7 @@ setup_ovs_vxlan6() { } setup_ovs_bridge() { - run_cmd ovs-vsctl add-br ovs_br0 || return 2 + run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip run_cmd ip link set ovs_br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -887,7 +887,7 @@ check_pmtu_value() { test_pmtu_ipvX() { family=${1} - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -985,11 +985,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ${type}4 || return 2 + setup namespaces routing ${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ${type}6 || return 2 + setup namespaces routing ${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1060,11 +1060,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing bridge bridged_${type}4 || return 2 + setup namespaces routing bridge bridged_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing bridge bridged_${type}6 || return 2 + setup namespaces routing bridge bridged_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1144,11 +1144,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ovs_bridge ovs_${type}4 || return 2 + setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ovs_bridge ovs_${type}6 || return 2 + setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1230,7 +1230,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() { encap=${3} ll_mtu=4000 - setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 + setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1309,7 +1309,7 @@ test_pmtu_ipvX_over_ipvY_exception() { outer=${2} ll_mtu=4000 - setup namespaces routing ip${inner}ip${outer} || return 2 + setup namespaces routing ip${inner}ip${outer} || return $ksft_skip trace "${ns_a}" ip_a "${ns_b}" ip_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ @@ -1363,7 +1363,7 @@ test_pmtu_ipv6_ipv6_exception() { } test_pmtu_vti4_exception() { - setup namespaces veth vti4 xfrm4 || return 2 + setup namespaces veth vti4 xfrm4 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti4_a "${ns_b}" vti4_b @@ -1393,7 +1393,7 @@ test_pmtu_vti4_exception() { } test_pmtu_vti6_exception() { - setup namespaces veth vti6 xfrm6 || return 2 + setup namespaces veth vti6 xfrm6 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti6_a "${ns_b}" vti6_b fail=0 @@ -1423,7 +1423,7 @@ test_pmtu_vti6_exception() { } test_pmtu_vti4_default_mtu() { - setup namespaces veth vti4 || return 2 + setup namespaces veth vti4 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv4 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1435,7 +1435,7 @@ test_pmtu_vti4_default_mtu() { } test_pmtu_vti6_default_mtu() { - setup namespaces veth vti6 || return 2 + setup namespaces veth vti6 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv6 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1447,10 +1447,10 @@ test_pmtu_vti6_default_mtu() { } test_pmtu_vti4_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 - [ $? -ne 0 ] && err " vti not supported" && return 2 + [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti4_a fail=0 @@ -1485,10 +1485,10 @@ test_pmtu_vti4_link_add_mtu() { } test_pmtu_vti6_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 - [ $? -ne 0 ] && err " vti6 not supported" && return 2 + [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti6_a fail=0 @@ -1523,10 +1523,10 @@ test_pmtu_vti6_link_add_mtu() { } test_pmtu_vti6_link_change_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy - [ $? -ne 0 ] && err " dummy not supported" && return 2 + [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy run_cmd ${ns_a} ip link set dummy0 up run_cmd ${ns_a} ip link set dummy1 up @@ -1579,10 +1579,10 @@ test_cleanup_vxlanX_exception() { encap="vxlan" ll_mtu=4000 - check_command taskset || return 2 + check_command taskset || return $ksft_skip cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) - setup namespaces routing ${encap}${outer} || return 2 + setup namespaces routing ${encap}${outer} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1644,7 +1644,7 @@ run_test() { fi err_flush exit 1 - elif [ $ret -eq 2 ]; then + elif [ $ret -eq $ksft_skip ]; then printf "TEST: %-60s [SKIP]\n" "${tdesc}" err_flush fi @@ -1667,7 +1667,7 @@ run_test_nh() { } test_list_flush_ipv4_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1721,7 +1721,7 @@ test_list_flush_ipv4_exception() { } test_list_flush_ipv6_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1840,7 +1840,7 @@ for t in ${tests}; do if [ $run_this -eq 1 ]; then run_test "${name}" "${desc}" # if test was skipped no need to retry with nexthop objects - [ $? -eq 2 ] && rerun_nh=0 + [ $? -eq $ksft_skip ] && rerun_nh=0 if [ "${rerun_nh}" = "1" ]; then run_test_nh "${name}" "${desc}" -- cgit v1.3.1 From 2a9d3716b810a4f2c8291b7aa8f358d11693f6e5 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Tue, 10 Nov 2020 10:00:49 +0800 Subject: selftests: pmtu.sh: improve the test result processing This test will treat all non-zero return codes as failures, it will make the pmtu.sh test script being marked as FAILED when some sub-test got skipped. Improve the result processing by * Only mark the whole test script as SKIP when all of the sub-tests were skipped * If the sub-tests were either passed or skipped, the overall result will be PASS * If any of them has failed with return code 1 or anything bad happened (e.g. return code 127 for command not found), the overall result will be FAIL Signed-off-by: Po-Hsu Lin Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index fb53987ab64b..464e31eabc73 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1652,7 +1652,19 @@ run_test() { return $ret ) ret=$? - [ $ret -ne 0 ] && exitcode=1 + case $ret in + 0) + all_skipped=false + [ $exitcode=$ksft_skip ] && exitcode=0 + ;; + $ksft_skip) + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac return $ret } @@ -1786,6 +1798,7 @@ usage() { # exitcode=0 desc=0 +all_skipped=true while getopts :ptv o do -- cgit v1.3.1 From fd63729cc0a6872bdabd393ee933a969642e4076 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 11 Nov 2020 15:12:15 -0800 Subject: selftests/bpf: Fix unused attribute usage in subprogs_unused test Correct attribute name is "unused". maybe_unused is a C++17 addition. This patch fixes compilation warning during selftests compilation. Fixes: 197afc631413 ("libbpf: Don't attempt to load unused subprog as an entry-point BPF program") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201111231215.1779147-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/test_subprogs_unused.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c index 75d975f8cf90..bc49e050d342 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c @@ -4,12 +4,12 @@ const char LICENSE[] SEC("license") = "GPL"; -__attribute__((maybe_unused)) __noinline int unused1(int x) +__attribute__((unused)) __noinline int unused1(int x) { return x + 1; } -static __attribute__((maybe_unused)) __noinline int unused2(int x) +static __attribute__((unused)) __noinline int unused2(int x) { return x + 2; } -- cgit v1.3.1 From 6a59edd832e2e353809778859d2a4a2d6c94d011 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 12 Nov 2020 10:10:50 +0100 Subject: tools/bpf: Add bootstrap/ to .gitignore Commit 8859b0da5aac ("tools/bpftool: Fix cross-build") added a build-time bootstrap/ directory for bpftool, and removed bpftool-bootstrap. Update .gitignore accordingly. Reported-by: Andrii Nakryiko Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201112091049.3159055-1-jean-philippe@linaro.org --- tools/bpf/bpftool/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index 3e601bcfd461..944cb4b7c95d 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only *.d -/bpftool-bootstrap +/bootstrap/ /bpftool bpftool*.8 bpf-helpers.* -- cgit v1.3.1 From c36538798fc6c80bd8bdaddad803b0c86dc13d7c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 12 Nov 2020 10:10:52 +0100 Subject: tools/bpf: Always run the *-clean recipes Make $(LIBBPF)-clean and $(LIBBPF_BOOTSTRAP)-clean .PHONY targets, in case those files exist. And keep consistency within the Makefile by making the directory dependencies order-only. Suggested-by: Andrii Nakryiko Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201112091049.3159055-2-jean-philippe@linaro.org --- tools/bpf/bpftool/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 804ade95929f..f897cb5fb12d 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -44,11 +44,11 @@ $(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ -$(LIBBPF)-clean: $(LIBBPF_OUTPUT) +$(LIBBPF)-clean: FORCE | $(LIBBPF_OUTPUT) $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null -$(LIBBPF_BOOTSTRAP)-clean: $(LIBBPF_BOOTSTRAP_OUTPUT) +$(LIBBPF_BOOTSTRAP)-clean: FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) $(call QUIET_CLEAN, libbpf-bootstrap) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) clean >/dev/null -- cgit v1.3.1 From e24a87b54ef3e39261f1d859b7f78416349dfb14 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 4 Nov 2020 17:42:28 +0800 Subject: perf lock: Correct field name "flags" The tracepoint "lock:lock_acquire" contains field "flags" but not "flag". Current code wrongly retrieves value from field "flag" and it always gets zero for the value, thus "perf lock" doesn't report the correct result. This patch replaces the field name "flag" with "flags", so can read out the correct flags for locking. Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence") Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201104094229.17509-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index f0a1dbacb46c..5cecc1ad78e1 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -406,7 +406,7 @@ static int report_lock_acquire_event(struct evsel *evsel, struct lock_seq_stat *seq; const char *name = evsel__strval(evsel, sample, "name"); u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); - int flag = evsel__intval(evsel, sample, "flag"); + int flag = evsel__intval(evsel, sample, "flags"); memcpy(&addr, &tmp, sizeof(void *)); -- cgit v1.3.1 From b0e5a05cc9e37763c7f19366d94b1a6160c755bc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 4 Nov 2020 17:42:29 +0800 Subject: perf lock: Don't free "lock_seq_stat" if read_count isn't zero When execute command "perf lock report", it hits failure and outputs log as follows: perf: builtin-lock.c:623: report_lock_release_event: Assertion `!(seq->read_count < 0)' failed. Aborted This is an imbalance issue. The locking sequence structure "lock_seq_stat" contains the reader counter and it is used to check if the locking sequence is balance or not between acquiring and releasing. If the tool wrongly frees "lock_seq_stat" when "read_count" isn't zero, the "read_count" will be reset to zero when allocate a new structure at the next time; thus it causes the wrong counting for reader and finally results in imbalance issue. To fix this issue, if detects "read_count" is not zero (means still have read user in the locking sequence), goto the "end" tag to skip freeing structure "lock_seq_stat". Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence") Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201104094229.17509-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 5cecc1ad78e1..a2f1e53f37a7 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -621,7 +621,7 @@ static int report_lock_release_event(struct evsel *evsel, case SEQ_STATE_READ_ACQUIRED: seq->read_count--; BUG_ON(seq->read_count < 0); - if (!seq->read_count) { + if (seq->read_count) { ls->nr_release++; goto end; } -- cgit v1.3.1 From db1a8b97a0a36155171dbb805fbcb276e07559f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Nov 2020 13:59:15 -0300 Subject: tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' To bring in the change made in this cset: 4d6ffa27b8e5116c ("x86/lib: Change .weak to SYM_FUNC_START_WEAK for arch/x86/lib/mem*_64.S") 6dcc5627f6aec4cb ("x86/asm: Change all ENTRY+ENDPROC to SYM_FUNC_*") I needed to define SYM_FUNC_START_LOCAL() as SYM_L_GLOBAL as mem{cpy,set}_{orig,erms} are used by 'perf bench'. This silences these perf tools build warnings: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fangrui Song Cc: Ian Rogers Cc: Jiri Olsa Cc: Jiri Slaby Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 8 +++----- tools/arch/x86/lib/memset_64.S | 11 ++++++----- tools/perf/bench/mem-memcpy-x86-64-asm.S | 3 +++ tools/perf/bench/mem-memset-x86-64-asm.S | 3 +++ tools/perf/util/include/linux/linkage.h | 7 +++++++ 5 files changed, 22 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 0b5b8ae56bd9..1e299ac73c86 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -16,8 +16,6 @@ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. */ -.weak memcpy - /* * memcpy - Copy a memory block. * @@ -30,7 +28,7 @@ * rax original destination */ SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_LOCAL(memcpy) +SYM_FUNC_START_WEAK(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -51,14 +49,14 @@ EXPORT_SYMBOL(__memcpy) * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. */ -SYM_FUNC_START(memcpy_erms) +SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb ret SYM_FUNC_END(memcpy_erms) -SYM_FUNC_START(memcpy_orig) +SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax cmpq $0x20, %rdx diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index fd5d25a474b7..0bfd26e4ca9e 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -4,8 +4,7 @@ #include #include #include - -.weak memset +#include /* * ISO C memset - set a memory block to a byte value. This function uses fast @@ -18,7 +17,7 @@ * * rax original destination */ -SYM_FUNC_START_ALIAS(memset) +SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -44,6 +43,8 @@ SYM_FUNC_START(__memset) ret SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) +EXPORT_SYMBOL(memset) +EXPORT_SYMBOL(__memset) /* * ISO C memset - set a memory block to a byte value. This function uses @@ -56,7 +57,7 @@ SYM_FUNC_END_ALIAS(memset) * * rax original destination */ -SYM_FUNC_START(memset_erms) +SYM_FUNC_START_LOCAL(memset_erms) movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx @@ -65,7 +66,7 @@ SYM_FUNC_START(memset_erms) ret SYM_FUNC_END(memset_erms) -SYM_FUNC_START(memset_orig) +SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 /* expand byte value */ diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 9ad015a1e202..6eb45a2aa8db 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -2,6 +2,9 @@ /* Various wrappers to make the kernel .S file build in user-space: */ +// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index d550bd526162..6f093c483842 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index b8a5159361b4..5acf053fca7d 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -25,6 +25,7 @@ /* SYM_L_* -- linkage of symbols */ #define SYM_L_GLOBAL(name) .globl name +#define SYM_L_WEAK(name) .weak name #define SYM_L_LOCAL(name) /* nothing */ #define ALIGN __ALIGN @@ -84,6 +85,12 @@ SYM_END(name, SYM_T_FUNC) #endif +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#ifndef SYM_FUNC_START_WEAK +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) +#endif + /* * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, * SYM_FUNC_START_WEAK, ... -- cgit v1.3.1 From db2ac2e49e564c2b219c4b33d9903aa383334256 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 10 Nov 2020 14:34:16 +0800 Subject: perf test: Fix a typo in cs-etm testing Fix a typo: s/devce_name/device_name. Fixes: fe0aed19b266 ("perf test: Introduce script for Arm CoreSight testing") Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight ml Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20201110063417.14467-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 8d84fdbed6a6..59d847d4981d 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -105,7 +105,7 @@ arm_cs_iterate_devices() { # `> device_name = 'tmc_etf0' device_name=$(basename $path) - if is_device_sink $path $devce_name; then + if is_device_sink $path $device_name; then record_touch_file $device_name $2 && perf_script_branch_samples touch && -- cgit v1.3.1 From dd94ac807a5e10e0b25b68397c473276905cca73 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 10 Nov 2020 14:34:17 +0800 Subject: perf test: Update branch sample pattern for cs-etm Since the commit 943b69ac1884 ("perf parse-events: Set exclude_guest=1 for user-space counting"), 'exclude_guest=1' is set for user-space counting; and the branch sample's modifier has been altered, the sample event name has been changed from "branches:u:" to "branches:uH:", which gives out info for "user-space and host counting". But the cs-etm testing's regular expression cannot match the updated branch sample event and leads to test failure. This patch updates the branch sample pattern by using a more flexible expression '.*' to match branch sample's modifiers, so that allows the testing to work as expected. Fixes: 943b69ac1884 ("perf parse-events: Set exclude_guest=1 for user-space counting") Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight ml Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20201110063417.14467-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 59d847d4981d..18fde2f179cd 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -44,7 +44,7 @@ perf_script_branch_samples() { # touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so) # touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so) perf script -F,-time -i ${perfdata} | \ - egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +" + egrep " +$1 +[0-9]+ .* +branches:(.*:)? +" } perf_report_branch_samples() { -- cgit v1.3.1 From e865802357086b36632acf3e629f726f089a6769 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 11 Nov 2020 16:05:35 +0100 Subject: selftests: set conf.all.rp_filter=0 in bareudp.sh When working on the rp_filter problem, I didn't realise that disabling it on the network devices didn't cover all cases: rp_filter could also be enabled globally in the namespace, in which case it would drop packets, even if the net device has rp_filter=0. Fixes: 1ccd58331f6f ("selftests: disable rp_filter when testing bareudp") Fixes: bbbc7aa45eef ("selftests: add test script for bareudp tunnels") Signed-off-by: Guillaume Nault Link: https://lore.kernel.org/r/f2d459346471f163b239aa9d63ce3e2ba9c62895.1605107012.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bareudp.sh | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index c2b9e990e544..f366cadbc5e8 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -238,6 +238,8 @@ setup_overlay_ipv4() # The intermediate namespaces don't have routes for the reverse path, # as it will be handled by tc. So we need to ensure that rp_filter is # not going to block the traffic. + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 } -- cgit v1.3.1 From 9cc873e85800ccde80aa2e4b2bae9f1b5fa4c478 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 10 Nov 2020 19:12:12 -0800 Subject: selftests/bpf: Add skb_pkt_end test Add a test that currently makes LLVM generate assembly code: $ llvm-objdump -S skb_pkt_end.o 0000000000000000 : ; if (skb_shorter(skb, ETH_IPV4_TCP_SIZE)) 0: 61 12 50 00 00 00 00 00 r2 = *(u32 *)(r1 + 80) 1: 61 14 4c 00 00 00 00 00 r4 = *(u32 *)(r1 + 76) 2: bf 43 00 00 00 00 00 00 r3 = r4 3: 07 03 00 00 36 00 00 00 r3 += 54 4: b7 01 00 00 00 00 00 00 r1 = 0 5: 2d 23 02 00 00 00 00 00 if r3 > r2 goto +2 6: 07 04 00 00 0e 00 00 00 r4 += 14 ; if (skb_shorter(skb, ETH_IPV4_TCP_SIZE)) 7: bf 41 00 00 00 00 00 00 r1 = r4 0000000000000040 : 8: b4 00 00 00 ff ff ff ff w0 = -1 ; if (!(ip = get_iphdr(skb))) 9: 2d 23 05 00 00 00 00 00 if r3 > r2 goto +5 ; proto = ip->protocol; 10: 71 12 09 00 00 00 00 00 r2 = *(u8 *)(r1 + 9) ; if (proto != IPPROTO_TCP) 11: 56 02 03 00 06 00 00 00 if w2 != 6 goto +3 ; if (tcp->dest != 0) 12: 69 12 16 00 00 00 00 00 r2 = *(u16 *)(r1 + 22) 13: 56 02 01 00 00 00 00 00 if w2 != 0 goto +1 ; return tcp->urg_ptr; 14: 69 10 26 00 00 00 00 00 r0 = *(u16 *)(r1 + 38) 0000000000000078 : ; } 15: 95 00 00 00 00 00 00 00 exit Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201111031213.25109-3-alexei.starovoitov@gmail.com --- .../selftests/bpf/prog_tests/test_skb_pkt_end.c | 41 ++++++++++++++++ tools/testing/selftests/bpf/progs/skb_pkt_end.c | 54 ++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c create mode 100644 tools/testing/selftests/bpf/progs/skb_pkt_end.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c new file mode 100644 index 000000000000..cf1215531920 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include +#include "skb_pkt_end.skel.h" + +static int sanity_run(struct bpf_program *prog) +{ + __u32 duration, retval; + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + if (CHECK(err || retval != 123, "test_run", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + return -1; + return 0; +} + +void test_test_skb_pkt_end(void) +{ + struct skb_pkt_end *skb_pkt_end_skel = NULL; + __u32 duration = 0; + int err; + + skb_pkt_end_skel = skb_pkt_end__open_and_load(); + if (CHECK(!skb_pkt_end_skel, "skb_pkt_end_skel_load", "skb_pkt_end skeleton failed\n")) + goto cleanup; + + err = skb_pkt_end__attach(skb_pkt_end_skel); + if (CHECK(err, "skb_pkt_end_attach", "skb_pkt_end attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(skb_pkt_end_skel->progs.main_prog)) + goto cleanup; + +cleanup: + skb_pkt_end__destroy(skb_pkt_end_skel); +} diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c new file mode 100644 index 000000000000..cf6823f42e80 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_PRESERVE_ACCESS_INDEX +#include +#include +#include + +#define NULL 0 +#define INLINE __always_inline + +#define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end) + +#define ETH_IPV4_TCP_SIZE (14 + sizeof(struct iphdr) + sizeof(struct tcphdr)) + +static INLINE struct iphdr *get_iphdr(struct __sk_buff *skb) +{ + struct iphdr *ip = NULL; + struct ethhdr *eth; + + if (skb_shorter(skb, ETH_IPV4_TCP_SIZE)) + goto out; + + eth = (void *)(long)skb->data; + ip = (void *)(eth + 1); + +out: + return ip; +} + +SEC("classifier/cls") +int main_prog(struct __sk_buff *skb) +{ + struct iphdr *ip = NULL; + struct tcphdr *tcp; + __u8 proto = 0; + + if (!(ip = get_iphdr(skb))) + goto out; + + proto = ip->protocol; + + if (proto != IPPROTO_TCP) + goto out; + + tcp = (void*)(ip + 1); + if (tcp->dest != 0) + goto out; + if (!tcp) + goto out; + + return tcp->urg_ptr; +out: + return -1; +} +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From cb62d34019d9117bb94de6ed35959449d43d6055 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 10 Nov 2020 19:12:13 -0800 Subject: selftests/bpf: Add asm tests for pkt vs pkt_end comparison. Add few assembly tests for packet comparison. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Jiri Olsa Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201111031213.25109-4-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/verifier/ctx_skb.c | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 2e16b8e268f2..2022c0f2cd75 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -1089,3 +1089,45 @@ .errstr_unpriv = "R1 leaks addr", .result = REJECT, }, +{ + "pkt > pkt_end taken check", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, // 0. r2 = *(u32 *)(r1 + data_end) + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, // 1. r4 = *(u32 *)(r1 + data) + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_4), // 2. r3 = r4 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42), // 3. r3 += 42 + BPF_MOV64_IMM(BPF_REG_1, 0), // 4. r1 = 0 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2), // 5. if r3 > r2 goto 8 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14), // 6. r4 += 14 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), // 7. r1 = r4 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1), // 8. if r3 > r2 goto 10 + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9), // 9. r2 = *(u8 *)(r1 + 9) + BPF_MOV64_IMM(BPF_REG_0, 0), // 10. r0 = 0 + BPF_EXIT_INSN(), // 11. exit + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "pkt_end < pkt taken check", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, // 0. r2 = *(u32 *)(r1 + data_end) + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, // 1. r4 = *(u32 *)(r1 + data) + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_4), // 2. r3 = r4 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42), // 3. r3 += 42 + BPF_MOV64_IMM(BPF_REG_1, 0), // 4. r1 = 0 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2), // 5. if r3 > r2 goto 8 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14), // 6. r4 += 14 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), // 7. r1 = r4 + BPF_JMP_REG(BPF_JLT, BPF_REG_2, BPF_REG_3, 1), // 8. if r2 < r3 goto 10 + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9), // 9. r2 = *(u8 *)(r1 + 9) + BPF_MOV64_IMM(BPF_REG_0, 0), // 10. r0 = 0 + BPF_EXIT_INSN(), // 11. exit + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, -- cgit v1.3.1 From 53632e11194663b7d5b043a68648892e593dc102 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 12 Nov 2020 13:13:20 -0800 Subject: bpf: selftest: Use bpf_sk_storage in FENTRY/FEXIT/RAW_TP This patch tests storing the task's related info into the bpf_sk_storage by fentry/fexit tracing at listen, accept, and connect. It also tests the raw_tp at inet_sock_set_state. A negative test is done by tracing the bpf_sk_storage_free() and using bpf_sk_storage_get() at the same time. It ensures this bpf program cannot load. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201112211320.2587537-1-kafai@fb.com --- .../selftests/bpf/prog_tests/sk_storage_tracing.c | 135 +++++++++++++++++++++ .../bpf/progs/test_sk_storage_trace_itself.c | 29 +++++ .../selftests/bpf/progs/test_sk_storage_tracing.c | 95 +++++++++++++++ 3 files changed, 259 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c create mode 100644 tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c create mode 100644 tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c new file mode 100644 index 000000000000..2b392590e8ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include +#include "test_progs.h" +#include "network_helpers.h" +#include "test_sk_storage_trace_itself.skel.h" +#include "test_sk_storage_tracing.skel.h" + +#define LO_ADDR6 "::1" +#define TEST_COMM "test_progs" + +struct sk_stg { + __u32 pid; + __u32 last_notclose_state; + char comm[16]; +}; + +static struct test_sk_storage_tracing *skel; +static __u32 duration; +static pid_t my_pid; + +static int check_sk_stg(int sk_fd, __u32 expected_state) +{ + struct sk_stg sk_stg; + int err; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_stg_map), &sk_fd, + &sk_stg); + if (!ASSERT_OK(err, "map_lookup(sk_stg_map)")) + return -1; + + if (!ASSERT_EQ(sk_stg.last_notclose_state, expected_state, + "last_notclose_state")) + return -1; + + if (!ASSERT_EQ(sk_stg.pid, my_pid, "pid")) + return -1; + + if (!ASSERT_STREQ(sk_stg.comm, skel->bss->task_comm, "task_comm")) + return -1; + + return 0; +} + +static void do_test(void) +{ + int listen_fd = -1, passive_fd = -1, active_fd = -1, value = 1, err; + char abyte; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); + if (CHECK(listen_fd == -1, "start_server", + "listen_fd:%d errno:%d\n", listen_fd, errno)) + return; + + active_fd = connect_to_fd(listen_fd, 0); + if (CHECK(active_fd == -1, "connect_to_fd", "active_fd:%d errno:%d\n", + active_fd, errno)) + goto out; + + err = bpf_map_update_elem(bpf_map__fd(skel->maps.del_sk_stg_map), + &active_fd, &value, 0); + if (!ASSERT_OK(err, "map_update(del_sk_stg_map)")) + goto out; + + passive_fd = accept(listen_fd, NULL, 0); + if (CHECK(passive_fd == -1, "accept", "passive_fd:%d errno:%d\n", + passive_fd, errno)) + goto out; + + shutdown(active_fd, SHUT_WR); + err = read(passive_fd, &abyte, 1); + if (!ASSERT_OK(err, "read(passive_fd)")) + goto out; + + shutdown(passive_fd, SHUT_WR); + err = read(active_fd, &abyte, 1); + if (!ASSERT_OK(err, "read(active_fd)")) + goto out; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.del_sk_stg_map), + &active_fd, &value); + if (!ASSERT_ERR(err, "map_lookup(del_sk_stg_map)")) + goto out; + + err = check_sk_stg(listen_fd, BPF_TCP_LISTEN); + if (!ASSERT_OK(err, "listen_fd sk_stg")) + goto out; + + err = check_sk_stg(active_fd, BPF_TCP_FIN_WAIT2); + if (!ASSERT_OK(err, "active_fd sk_stg")) + goto out; + + err = check_sk_stg(passive_fd, BPF_TCP_LAST_ACK); + ASSERT_OK(err, "passive_fd sk_stg"); + +out: + if (active_fd != -1) + close(active_fd); + if (passive_fd != -1) + close(passive_fd); + if (listen_fd != -1) + close(listen_fd); +} + +void test_sk_storage_tracing(void) +{ + struct test_sk_storage_trace_itself *skel_itself; + int err; + + my_pid = getpid(); + + skel_itself = test_sk_storage_trace_itself__open_and_load(); + + if (!ASSERT_NULL(skel_itself, "test_sk_storage_trace_itself")) { + test_sk_storage_trace_itself__destroy(skel_itself); + return; + } + + skel = test_sk_storage_tracing__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_sk_storage_tracing")) + return; + + err = test_sk_storage_tracing__attach(skel); + if (!ASSERT_OK(err, "test_sk_storage_tracing__attach")) { + test_sk_storage_tracing__destroy(skel); + return; + } + + do_test(); + + test_sk_storage_tracing__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c new file mode 100644 index 000000000000..59ef72d02a61 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_stg_map SEC(".maps"); + +SEC("fentry/bpf_sk_storage_free") +int BPF_PROG(trace_bpf_sk_storage_free, struct sock *sk) +{ + int *value; + + value = bpf_sk_storage_get(&sk_stg_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + + if (value) + *value = 1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c new file mode 100644 index 000000000000..8e94e5c080aa --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include +#include + +struct sk_stg { + __u32 pid; + __u32 last_notclose_state; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct sk_stg); +} sk_stg_map SEC(".maps"); + +/* Testing delete */ +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} del_sk_stg_map SEC(".maps"); + +char task_comm[16] = ""; + +SEC("tp_btf/inet_sock_set_state") +int BPF_PROG(trace_inet_sock_set_state, struct sock *sk, int oldstate, + int newstate) +{ + struct sk_stg *stg; + + if (newstate == BPF_TCP_CLOSE) + return 0; + + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!stg) + return 0; + + stg->last_notclose_state = newstate; + + bpf_sk_storage_delete(&del_sk_stg_map, sk); + + return 0; +} + +static void set_task_info(struct sock *sk) +{ + struct task_struct *task; + struct sk_stg *stg; + + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!stg) + return; + + stg->pid = bpf_get_current_pid_tgid(); + + task = (struct task_struct *)bpf_get_current_task(); + bpf_core_read_str(&stg->comm, sizeof(stg->comm), &task->comm); + bpf_core_read_str(&task_comm, sizeof(task_comm), &task->comm); +} + +SEC("fentry/inet_csk_listen_start") +int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog) +{ + set_task_info(sk); + + return 0; +} + +SEC("fentry/tcp_connect") +int BPF_PROG(trace_tcp_connect, struct sock *sk) +{ + set_task_info(sk); + + return 0; +} + +SEC("fexit/inet_csk_accept") +int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern, + struct sock *accepted_sk) +{ + set_task_info(accepted_sk); + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 50431b45685b600fc2851a3f2b53e24643efe6d3 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 13 Nov 2020 19:51:52 +0800 Subject: tools, bpftool: Add missing close before bpftool net attach exit progfd is created by prog_parse_fd() in do_attach() and before the latter returns in case of success, the file descriptor should be closed. Fixes: 04949ccc273e ("tools: bpftool: add net attach command to attach XDP on interface") Signed-off-by: Wang Hai Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201113115152.53178-1-wanghai38@huawei.com --- tools/bpf/bpftool/net.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 910e7bac6e9e..3fae61ef6339 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -578,8 +578,8 @@ static int do_attach(int argc, char **argv) ifindex = net_parse_dev(&argc, &argv); if (ifindex < 1) { - close(progfd); - return -EINVAL; + err = -EINVAL; + goto cleanup; } if (argc) { @@ -587,8 +587,8 @@ static int do_attach(int argc, char **argv) overwrite = true; } else { p_err("expected 'overwrite', got: '%s'?", *argv); - close(progfd); - return -EINVAL; + err = -EINVAL; + goto cleanup; } } @@ -596,17 +596,17 @@ static int do_attach(int argc, char **argv) if (is_prefix("xdp", attach_type_strings[attach_type])) err = do_attach_detach_xdp(progfd, attach_type, ifindex, overwrite); - - if (err < 0) { + if (err) { p_err("interface %s attach failed: %s", attach_type_strings[attach_type], strerror(-err)); - return err; + goto cleanup; } if (json_output) jsonw_null(json_wtr); - - return 0; +cleanup: + close(progfd); + return err; } static int do_detach(int argc, char **argv) -- cgit v1.3.1 From f782e2c300a717233b64697affda3ea7aac00b2b Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Fri, 13 Nov 2020 17:17:56 +0000 Subject: bpf: Relax return code check for subprograms Currently verifier enforces return code checks for subprograms in the same manner as it does for program entry points. This prevents returning arbitrary scalar values from subprograms. Scalar type of returned values is checked by btf_prepare_func_args() and hence it should be safe to allow only scalars for now. Relax return code checks for subprograms and allow any correct scalar values. Fixes: 51c39bb1d5d10 (bpf: Introduce function-by-function verification) Signed-off-by: Dmitrii Banshchikov Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201113171756.90594-1-me@ubique.spb.ru --- kernel/bpf/verifier.c | 15 +++++++++++++-- .../selftests/bpf/prog_tests/test_global_funcs.c | 1 + tools/testing/selftests/bpf/progs/test_global_func8.c | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_global_func8.c (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6204ec705d80..1388bf733071 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum range = tnum_range(0, 1); enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; + const bool is_subprog = env->cur_state->frame[0]->subprogno; /* LSM and struct_ops func-ptr's return type could be "void" */ - if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS || + if (!is_subprog && + (prog_type == BPF_PROG_TYPE_STRUCT_OPS || prog_type == BPF_PROG_TYPE_LSM) && !prog->aux->attach_func_proto->type) return 0; @@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env) return -EACCES; } + reg = cur_regs(env) + BPF_REG_0; + if (is_subprog) { + if (reg->type != SCALAR_VALUE) { + verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", + reg_type_str[reg->type]); + return -EINVAL; + } + return 0; + } + switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || @@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env) return 0; } - reg = cur_regs(env) + BPF_REG_0; if (reg->type != SCALAR_VALUE) { verbose(env, "At program exit the register R0 is not a known value (%s)\n", reg_type_str[reg->type]); diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 193002b14d7f..32e4348b714b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -60,6 +60,7 @@ void test_test_global_funcs(void) { "test_global_func5.o" , "expected pointer to ctx, but got PTR" }, { "test_global_func6.o" , "modified ctx ptr R2" }, { "test_global_func7.o" , "foo() doesn't return scalar" }, + { "test_global_func8.o" }, }; libbpf_print_fn_t old_print_fn = NULL; int err, i, duration = 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c new file mode 100644 index 000000000000..d55a6544b1ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func8.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include +#include +#include + +__noinline int foo(struct __sk_buff *skb) +{ + return bpf_get_prandom_u32(); +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + if (!foo(skb)) + return 0; + + return 1; +} -- cgit v1.3.1 From 7a873e4555679a0e749422db071c142b57f80be9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Oct 2020 18:44:17 -0700 Subject: KVM: selftests: Verify supported CR4 bits can be set before KVM_SET_CPUID2 Extend the KVM_SET_SREGS test to verify that all supported CR4 bits, as enumerated by KVM, can be set before KVM_SET_CPUID2, i.e. without first defining the vCPU model. KVM is supposed to skip guest CPUID checks when host userspace is stuffing guest state. Check the inverse as well, i.e. that KVM rejects KVM_SET_REGS if CR4 has one or more unsupported bits set. Signed-off-by: Sean Christopherson Message-Id: <20201007014417.29276-7-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 17 ++++ tools/testing/selftests/kvm/include/x86_64/vmx.h | 4 - .../testing/selftests/kvm/x86_64/set_sregs_test.c | 92 +++++++++++++++++++++- 3 files changed, 108 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8e61340b3911..90cd5984751b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -27,6 +27,7 @@ #define X86_CR4_OSFXSR (1ul << 9) #define X86_CR4_OSXMMEXCPT (1ul << 10) #define X86_CR4_UMIP (1ul << 11) +#define X86_CR4_LA57 (1ul << 12) #define X86_CR4_VMXE (1ul << 13) #define X86_CR4_SMXE (1ul << 14) #define X86_CR4_FSGSBASE (1ul << 16) @@ -36,6 +37,22 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) +/* CPUID.1.ECX */ +#define CPUID_VMX (1ul << 5) +#define CPUID_SMX (1ul << 6) +#define CPUID_PCID (1ul << 17) +#define CPUID_XSAVE (1ul << 26) + +/* CPUID.7.EBX */ +#define CPUID_FSGSBASE (1ul << 0) +#define CPUID_SMEP (1ul << 7) +#define CPUID_SMAP (1ul << 20) + +/* CPUID.7.ECX */ +#define CPUID_UMIP (1ul << 2) +#define CPUID_PKU (1ul << 3) +#define CPUID_LA57 (1ul << 16) + #define UNEXPECTED_VECTOR_PORT 0xfff0u /* General Registers in 64-Bit Mode */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index e78d7e26ba61..65eb1079a161 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -11,10 +11,6 @@ #include #include "processor.h" -#define CPUID_VMX_BIT 5 - -#define CPUID_VMX (1 << 5) - /* * Definitions of Primary Processor-Based VM-Execution Controls. */ diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 9f7656184f31..318be0bf77ab 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -24,16 +24,106 @@ #define VCPU_ID 5 +static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig, + uint64_t feature_bit) +{ + struct kvm_sregs sregs; + int rc; + + /* Skip the sub-test, the feature is supported. */ + if (orig->cr4 & feature_bit) + return; + + memcpy(&sregs, orig, sizeof(sregs)); + sregs.cr4 |= feature_bit; + + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit); + + /* Sanity check that KVM didn't change anything. */ + vcpu_sregs_get(vm, VCPU_ID, &sregs); + TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs"); +} + +static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm) +{ + struct kvm_cpuid_entry2 *cpuid_1, *cpuid_7; + uint64_t cr4; + + cpuid_1 = kvm_get_supported_cpuid_entry(1); + cpuid_7 = kvm_get_supported_cpuid_entry(7); + + cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | + X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | + X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; + if (cpuid_7->ecx & CPUID_UMIP) + cr4 |= X86_CR4_UMIP; + if (cpuid_7->ecx & CPUID_LA57) + cr4 |= X86_CR4_LA57; + if (cpuid_1->ecx & CPUID_VMX) + cr4 |= X86_CR4_VMXE; + if (cpuid_1->ecx & CPUID_SMX) + cr4 |= X86_CR4_SMXE; + if (cpuid_7->ebx & CPUID_FSGSBASE) + cr4 |= X86_CR4_FSGSBASE; + if (cpuid_1->ecx & CPUID_PCID) + cr4 |= X86_CR4_PCIDE; + if (cpuid_1->ecx & CPUID_XSAVE) + cr4 |= X86_CR4_OSXSAVE; + if (cpuid_7->ebx & CPUID_SMEP) + cr4 |= X86_CR4_SMEP; + if (cpuid_7->ebx & CPUID_SMAP) + cr4 |= X86_CR4_SMAP; + if (cpuid_7->ecx & CPUID_PKU) + cr4 |= X86_CR4_PKE; + + return cr4; +} + int main(int argc, char *argv[]) { struct kvm_sregs sregs; struct kvm_vm *vm; + uint64_t cr4; int rc; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - /* Create VM */ + /* + * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and + * use it to verify all supported CR4 bits can be set prior to defining + * the vCPU model, i.e. without doing KVM_SET_CPUID2. + */ + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm_vcpu_add(vm, VCPU_ID); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + + sregs.cr4 |= calc_cr4_feature_bits(vm); + cr4 = sregs.cr4; + + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", + sregs.cr4, cr4); + + /* Verify all unsupported features are rejected by KVM. */ + test_cr4_feature_bit(vm, &sregs, X86_CR4_UMIP); + test_cr4_feature_bit(vm, &sregs, X86_CR4_LA57); + test_cr4_feature_bit(vm, &sregs, X86_CR4_VMXE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_SMXE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_FSGSBASE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_PCIDE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_OSXSAVE); + test_cr4_feature_bit(vm, &sregs, X86_CR4_SMEP); + test_cr4_feature_bit(vm, &sregs, X86_CR4_SMAP); + test_cr4_feature_bit(vm, &sregs, X86_CR4_PKE); + kvm_vm_free(vm); + + /* Create a "real" VM and verify APIC_BASE can be set. */ vm = vm_create_default(VCPU_ID, 0, NULL); vcpu_sregs_get(vm, VCPU_ID, &sregs); -- cgit v1.3.1 From 8b460692fee46a47cebd66d70df88dc9aa6d6b8b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 29 Sep 2020 17:09:44 +0200 Subject: KVM: selftests: test KVM_GET_SUPPORTED_HV_CPUID as a system ioctl KVM_GET_SUPPORTED_HV_CPUID is now supported as both vCPU and VM ioctl, test that. Signed-off-by: Vitaly Kuznetsov Message-Id: <20200929150944.1235688-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 2 + tools/testing/selftests/kvm/lib/kvm_util.c | 26 +++++++ tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 87 +++++++++++++---------- 3 files changed, 77 insertions(+), 38 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 7d29aa786959..b2c1364c0402 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -114,6 +114,8 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 126c6727a6b0..ef2114e1b7ad 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1582,6 +1582,32 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) cmd, ret, errno, strerror(errno)); } +/* + * KVM system ioctl + * + * Input Args: + * vm - Virtual Machine + * cmd - Ioctl number + * arg - Argument to pass to the ioctl + * + * Return: None + * + * Issues an arbitrary ioctl on a KVM fd. + */ +void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + int ret; + + ret = ioctl(vm->kvm_fd, cmd, arg); + TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)", + cmd, ret, errno, strerror(errno)); +} + +int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + return ioctl(vm->kvm_fd, cmd, arg); +} + /* * VM Dump * diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 745b708c2d3b..88a595b7fbdd 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -46,19 +46,19 @@ static bool smt_possible(void) } static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, - bool evmcs_enabled) + bool evmcs_expected) { int i; int nent = 9; u32 test_val; - if (evmcs_enabled) + if (evmcs_expected) nent += 1; /* 0x4000000A */ TEST_ASSERT(hv_cpuid_entries->nent == nent, "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" " with evmcs=%d (returned %d)", - nent, evmcs_enabled, hv_cpuid_entries->nent); + nent, evmcs_expected, hv_cpuid_entries->nent); for (i = 0; i < hv_cpuid_entries->nent; i++) { struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i]; @@ -68,7 +68,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, "function %x is our of supported range", entry->function); - TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A), + TEST_ASSERT(evmcs_expected || (entry->function != 0x4000000A), "0x4000000A leaf should not be reported"); TEST_ASSERT(entry->index == 0, @@ -87,7 +87,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(entry->eax == test_val, "Wrong max leaf report in 0x40000000.EAX: %x" " (evmcs=%d)", - entry->eax, evmcs_enabled + entry->eax, evmcs_expected ); break; case 0x40000004: @@ -110,20 +110,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, } -void test_hv_cpuid_e2big(struct kvm_vm *vm) +void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system) { static struct kvm_cpuid2 cpuid = {.nent = 0}; int ret; - ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + if (!system) + ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + else + ret = _kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); TEST_ASSERT(ret == -1 && errno == E2BIG, - "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when" - " it should have: %d %d", ret, errno); + "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when" + " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno); } -struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) +struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm, bool system) { int nent = 20; /* should be enough */ static struct kvm_cpuid2 *cpuid; @@ -137,7 +140,10 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) cpuid->nent = nent; - vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + if (!system) + vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + else + kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, cpuid); return cpuid; } @@ -146,45 +152,50 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) int main(int argc, char *argv[]) { struct kvm_vm *vm; - int rv, stage; struct kvm_cpuid2 *hv_cpuid_entries; - bool evmcs_enabled; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID); - if (!rv) { + if (!kvm_check_cap(KVM_CAP_HYPERV_CPUID)) { print_skip("KVM_CAP_HYPERV_CPUID not supported"); exit(KSFT_SKIP); } - for (stage = 0; stage < 3; stage++) { - evmcs_enabled = false; + vm = vm_create_default(VCPU_ID, 0, guest_code); - vm = vm_create_default(VCPU_ID, 0, guest_code); - switch (stage) { - case 0: - test_hv_cpuid_e2big(vm); - continue; - case 1: - break; - case 2: - if (!nested_vmx_supported() || - !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { - print_skip("Enlightened VMCS is unsupported"); - continue; - } - vcpu_enable_evmcs(vm, VCPU_ID); - evmcs_enabled = true; - break; - } + /* Test vCPU ioctl version */ + test_hv_cpuid_e2big(vm, false); + + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false); + test_hv_cpuid(hv_cpuid_entries, false); + free(hv_cpuid_entries); - hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); - test_hv_cpuid(hv_cpuid_entries, evmcs_enabled); - free(hv_cpuid_entries); - kvm_vm_free(vm); + if (!nested_vmx_supported() || + !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { + print_skip("Enlightened VMCS is unsupported"); + goto do_sys; } + vcpu_enable_evmcs(vm, VCPU_ID); + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false); + test_hv_cpuid(hv_cpuid_entries, true); + free(hv_cpuid_entries); + +do_sys: + /* Test system ioctl version */ + if (!kvm_check_cap(KVM_CAP_SYS_HYPERV_CPUID)) { + print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported"); + goto out; + } + + test_hv_cpuid_e2big(vm, true); + + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, true); + test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported()); + free(hv_cpuid_entries); + +out: + kvm_vm_free(vm); return 0; } -- cgit v1.3.1 From 60f644fb519831edff38c79755f7970c475e2ece Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:35 -0400 Subject: KVM: selftests: Introduce after_vcpu_run hook for dirty log test Provide a hook for the checks after vcpu_run() completes. Preparation for the dirty ring test because we'll need to take care of another exit reason. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012235.6063-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 36 ++++++++++++++++++---------- 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 54da9cc20db4..9215b26af10c 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -178,6 +178,15 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); } +static void default_after_vcpu_run(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); +} + struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -187,16 +196,20 @@ struct log_mode { /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, void *bitmap, uint32_t num_pages); + /* Hook to call when after each vcpu run */ + void (*after_vcpu_run)(struct kvm_vm *vm); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", .collect_dirty_pages = dirty_log_collect_dirty_pages, + .after_vcpu_run = default_after_vcpu_run, }, { .name = "clear-log", .supported = clear_log_supported, .create_vm_done = clear_log_create_vm_done, .collect_dirty_pages = clear_log_collect_dirty_pages, + .after_vcpu_run = default_after_vcpu_run, }, }; @@ -247,6 +260,14 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, mode->collect_dirty_pages(vm, slot, bitmap, num_pages); } +static void log_mode_after_vcpu_run(struct kvm_vm *vm) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->after_vcpu_run) + mode->after_vcpu_run(vm); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -261,25 +282,16 @@ static void *vcpu_worker(void *data) struct kvm_vm *vm = data; uint64_t *guest_array; uint64_t pages_count = 0; - struct kvm_run *run; - - run = vcpu_state(vm, VCPU_ID); guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); while (!READ_ONCE(host_quit)) { + generate_random_array(guest_array, TEST_PAGES_PER_LOOP); + pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ ret = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { - pages_count += TEST_PAGES_PER_LOOP; - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); - } else { - TEST_FAIL("Invalid guest sync status: " - "exit_reason=%s\n", - exit_reason_str(run->exit_reason)); - } + log_mode_after_vcpu_run(vm); } pr_info("Dirtied %"PRIu64" pages\n", pages_count); -- cgit v1.3.1 From 84292e565951cecfe2718e43905a6103c9e8ac29 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:37 -0400 Subject: KVM: selftests: Add dirty ring buffer test Add the initial dirty ring buffer test. The current test implements the userspace dirty ring collection, by only reaping the dirty ring when the ring is full. So it's still running synchronously like this: vcpu main thread 1. vcpu dirties pages 2. vcpu gets dirty ring full (userspace exit) 3. main thread waits until full (so hardware buffers flushed) 4. main thread collects 5. main thread continues vcpu 6. vcpu continues, goes back to 1 We can't directly collects dirty bits during vcpu execution because otherwise we can't guarantee the hardware dirty bits were flushed when we collect and we're very strict on the dirty bits so otherwise we can fail the future verify procedure. A follow up patch will make this test to support async just like the existing dirty log test, by adding a vcpu kick mechanism. Signed-off-by: Peter Xu Message-Id: <20201001012237.6111-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 254 ++++++++++++++++++++- tools/testing/selftests/kvm/include/kvm_util.h | 3 + tools/testing/selftests/kvm/lib/kvm_util.c | 72 +++++- .../testing/selftests/kvm/lib/kvm_util_internal.h | 4 + 4 files changed, 320 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 9215b26af10c..a7da4d9471e1 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -12,8 +12,13 @@ #include #include #include +#include +#include +#include +#include #include #include +#include #include "test_util.h" #include "kvm_util.h" @@ -57,6 +62,8 @@ # define test_and_clear_bit_le test_and_clear_bit #endif +#define TEST_DIRTY_RING_COUNT 1024 + /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from @@ -128,6 +135,25 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +/* Whether dirty ring reset is requested, or finished */ +static sem_t dirty_ring_vcpu_stop; +static sem_t dirty_ring_vcpu_cont; +/* + * This is only used for verifying the dirty pages. Dirty ring has a very + * tricky case when the ring just got full, kvm will do userspace exit due to + * ring full. When that happens, the very last PFN is set but actually the + * data is not changed (the guest WRITE is not really applied yet), because + * we found that the dirty ring is full, refused to continue the vcpu, and + * recorded the dirty gfn with the old contents. + * + * For this specific case, it's safe to skip checking this pfn for this + * bit, because it's a redundant bit, and when the write happens later the bit + * will be set again. We use this variable to always keep track of the latest + * dirty gfn we've collected, so that if a mismatch of data found later in the + * verifying process, we let it pass. + */ +static uint64_t dirty_ring_last_page; + enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ LOG_MODE_DIRTY_LOG = 0, @@ -135,6 +161,9 @@ enum log_mode_t { /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ LOG_MODE_CLEAR_LOG = 1, + /* Use dirty ring for logging */ + LOG_MODE_DIRTY_RING = 2, + LOG_MODE_NUM, /* Run all supported modes */ @@ -145,6 +174,20 @@ enum log_mode_t { static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; /* Logging mode for current run */ static enum log_mode_t host_log_mode; +static pthread_t vcpu_thread; + +/* + * In our test we do signal tricks, let's use a better version of + * sem_wait to avoid signal interrupts + */ +static void sem_wait_until(sem_t *sem) +{ + int ret; + + do + ret = sem_wait(sem); + while (ret == -1 && errno == EINTR); +} static bool clear_log_supported(void) { @@ -178,15 +221,131 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); } -static void default_after_vcpu_run(struct kvm_vm *vm) +static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) { struct kvm_run *run = vcpu_state(vm, VCPU_ID); + TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), + "vcpu run failed: errno=%d", err); + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); } +static bool dirty_ring_supported(void) +{ + return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING); +} + +static void dirty_ring_create_vm_done(struct kvm_vm *vm) +{ + /* + * Switch to dirty ring mode after VM creation but before any + * of the vcpu creation. + */ + vm_enable_dirty_ring(vm, TEST_DIRTY_RING_COUNT * + sizeof(struct kvm_dirty_gfn)); +} + +static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags == KVM_DIRTY_GFN_F_DIRTY; +} + +static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = KVM_DIRTY_GFN_F_RESET; +} + +static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, + int slot, void *bitmap, + uint32_t num_pages, uint32_t *fetch_index) +{ + struct kvm_dirty_gfn *cur; + uint32_t count = 0; + + while (true) { + cur = &dirty_gfns[*fetch_index % TEST_DIRTY_RING_COUNT]; + if (!dirty_gfn_is_dirtied(cur)) + break; + TEST_ASSERT(cur->slot == slot, "Slot number didn't match: " + "%u != %u", cur->slot, slot); + TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " + "0x%llx >= 0x%x", cur->offset, num_pages); + //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); + set_bit_le(cur->offset, bitmap); + dirty_ring_last_page = cur->offset; + dirty_gfn_set_collected(cur); + (*fetch_index)++; + count++; + } + + return count; +} + +static void dirty_ring_wait_vcpu(void) +{ + sem_wait_until(&dirty_ring_vcpu_stop); +} + +static void dirty_ring_continue_vcpu(void) +{ + pr_info("Notifying vcpu to continue\n"); + sem_post(&dirty_ring_vcpu_cont); +} + +static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + /* We only have one vcpu */ + static uint32_t fetch_index = 0; + uint32_t count = 0, cleared; + + dirty_ring_wait_vcpu(); + + /* Only have one vcpu */ + count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID), + slot, bitmap, num_pages, &fetch_index); + + cleared = kvm_vm_reset_dirty_ring(vm); + + /* Cleared pages should be the same as collected */ + TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " + "with collected (%u)", cleared, count); + + dirty_ring_continue_vcpu(); + + pr_info("Iteration %ld collected %u pages\n", iteration, count); +} + +static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + /* A ucall-sync or ring-full event is allowed */ + if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { + /* We should allow this to continue */ + ; + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + /* Update the flag first before pause */ + sem_post(&dirty_ring_vcpu_stop); + pr_info("vcpu stops because dirty ring is full...\n"); + sem_wait_until(&dirty_ring_vcpu_cont); + pr_info("vcpu continues now.\n"); + } else { + TEST_ASSERT(false, "Invalid guest sync status: " + "exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + } +} + +static void dirty_ring_before_vcpu_join(void) +{ + /* Kick another round of vcpu just to make sure it will quit */ + sem_post(&dirty_ring_vcpu_cont); +} + struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -197,7 +356,8 @@ struct log_mode { void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, void *bitmap, uint32_t num_pages); /* Hook to call when after each vcpu run */ - void (*after_vcpu_run)(struct kvm_vm *vm); + void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err); + void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -211,6 +371,14 @@ struct log_mode { .collect_dirty_pages = clear_log_collect_dirty_pages, .after_vcpu_run = default_after_vcpu_run, }, + { + .name = "dirty-ring", + .supported = dirty_ring_supported, + .create_vm_done = dirty_ring_create_vm_done, + .collect_dirty_pages = dirty_ring_collect_dirty_pages, + .before_vcpu_join = dirty_ring_before_vcpu_join, + .after_vcpu_run = dirty_ring_after_vcpu_run, + }, }; /* @@ -260,12 +428,20 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, mode->collect_dirty_pages(vm, slot, bitmap, num_pages); } -static void log_mode_after_vcpu_run(struct kvm_vm *vm) +static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err) { struct log_mode *mode = &log_modes[host_log_mode]; if (mode->after_vcpu_run) - mode->after_vcpu_run(vm); + mode->after_vcpu_run(vm, ret, err); +} + +static void log_mode_before_vcpu_join(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->before_vcpu_join) + mode->before_vcpu_join(); } static void generate_random_array(uint64_t *guest_array, uint64_t size) @@ -278,20 +454,22 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size) static void *vcpu_worker(void *data) { - int ret; + int ret, vcpu_fd; struct kvm_vm *vm = data; uint64_t *guest_array; uint64_t pages_count = 0; + vcpu_fd = vcpu_get_fd(vm, VCPU_ID); + guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); while (!READ_ONCE(host_quit)) { + /* Clear any existing kick signals */ generate_random_array(guest_array, TEST_PAGES_PER_LOOP); pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - log_mode_after_vcpu_run(vm); + ret = ioctl(vcpu_fd, KVM_RUN, NULL); + log_mode_after_vcpu_run(vm, ret, errno); } pr_info("Dirtied %"PRIu64" pages\n", pages_count); @@ -304,6 +482,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) uint64_t step = vm_num_host_pages(mode, 1); uint64_t page; uint64_t *value_ptr; + uint64_t min_iter = 0; for (page = 0; page < host_num_pages; page += step) { value_ptr = host_test_mem + page * host_page_size; @@ -318,14 +497,64 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) } if (test_and_clear_bit_le(page, bmap)) { + bool matched; + host_dirty_count++; + /* * If the bit is set, the value written onto * the corresponding page should be either the * previous iteration number or the current one. */ - TEST_ASSERT(*value_ptr == iteration || - *value_ptr == iteration - 1, + matched = (*value_ptr == iteration || + *value_ptr == iteration - 1); + + if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { + if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) { + /* + * Short answer: this case is special + * only for dirty ring test where the + * page is the last page before a kvm + * dirty ring full in iteration N-2. + * + * Long answer: Assuming ring size R, + * one possible condition is: + * + * main thr vcpu thr + * -------- -------- + * iter=1 + * write 1 to page 0~(R-1) + * full, vmexit + * collect 0~(R-1) + * kick vcpu + * write 1 to (R-1)~(2R-2) + * full, vmexit + * iter=2 + * collect (R-1)~(2R-2) + * kick vcpu + * write 1 to (2R-2) + * (NOTE!!! "1" cached in cpu reg) + * write 2 to (2R-1)~(3R-3) + * full, vmexit + * iter=3 + * collect (2R-2)~(3R-3) + * (here if we read value on page + * "2R-2" is 1, while iter=3!!!) + * + * This however can only happen once per iteration. + */ + min_iter = iteration - 1; + continue; + } else if (page == dirty_ring_last_page) { + /* + * Please refer to comments in + * dirty_ring_last_page. + */ + continue; + } + } + + TEST_ASSERT(matched, "Set page %"PRIu64" value %"PRIu64 " incorrect (iteration=%"PRIu64")", page, *value_ptr, iteration); @@ -390,7 +619,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { - pthread_t vcpu_thread; struct kvm_vm *vm; unsigned long *bmap; @@ -488,6 +716,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Tell the vcpu thread to quit */ host_quit = true; + log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " @@ -548,6 +777,9 @@ int main(int argc, char *argv[]) unsigned int mode; int opt, i, j; + sem_init(&dirty_ring_vcpu_stop, 0, 0); + sem_init(&dirty_ring_vcpu_cont, 0, 0); + #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); #endif diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index b2c1364c0402..710009cc17fd 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -74,6 +74,7 @@ void kvm_vm_release(struct kvm_vm *vmp); void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, uint64_t first_page, uint32_t num_pages); +uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm); int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); @@ -148,6 +149,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_guest_debug *debug); @@ -201,6 +203,7 @@ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state, bool ignore_error); #endif +void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid); const char *exit_reason_str(unsigned int exit_reason); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index ef2114e1b7ad..a04302666b02 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -114,6 +114,16 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, return r; } +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) +{ + struct kvm_enable_cap cap = { 0 }; + + cap.cap = KVM_CAP_DIRTY_LOG_RING; + cap.args[0] = ring_size; + vm_enable_cap(vm, &cap); + vm->dirty_ring_size = ring_size; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); @@ -328,6 +338,11 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, __func__, strerror(-ret)); } +uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) +{ + return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS); +} + /* * Userspace Memory Region Find * @@ -432,10 +447,17 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid) * * Removes a vCPU from a VM and frees its resources. */ -static void vm_vcpu_rm(struct vcpu *vcpu) +static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) { int ret; + if (vcpu->dirty_gfns) { + ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); + TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, " + "rc: %i errno: %i", ret, errno); + vcpu->dirty_gfns = NULL; + } + ret = munmap(vcpu->state, sizeof(*vcpu->state)); TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " "errno: %i", ret, errno); @@ -453,7 +475,7 @@ void kvm_vm_release(struct kvm_vm *vmp) int ret; list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) - vm_vcpu_rm(vcpu); + vm_vcpu_rm(vmp, vcpu); ret = close(vmp->fd); TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" @@ -1233,6 +1255,15 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) return rc; } +int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + return vcpu->fd; +} + void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); @@ -1561,6 +1592,42 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, return ret; } +void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu; + uint32_t size = vm->dirty_ring_size; + + TEST_ASSERT(size > 0, "Should enable dirty ring first"); + + vcpu = vcpu_find(vm, vcpuid); + + TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid); + + if (!vcpu->dirty_gfns) { + void *addr; + + addr = mmap(NULL, size, PROT_READ, + MAP_PRIVATE, vcpu->fd, + vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); + + addr = mmap(NULL, size, PROT_READ | PROT_EXEC, + MAP_PRIVATE, vcpu->fd, + vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); + + addr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd, + vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + + vcpu->dirty_gfns = addr; + vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); + } + + return vcpu->dirty_gfns; +} + /* * VM Ioctl * @@ -1680,6 +1747,7 @@ static struct exit_reason { {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, {KVM_EXIT_OSI, "OSI"}, {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, + {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, #ifdef KVM_EXIT_MEMORY_NOT_PRESENT {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, #endif diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index f07d383d03a1..34465dc562d8 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -28,6 +28,9 @@ struct vcpu { uint32_t id; int fd; struct kvm_run *state; + struct kvm_dirty_gfn *dirty_gfns; + uint32_t fetch_index; + uint32_t dirty_gfns_count; }; struct kvm_vm { @@ -52,6 +55,7 @@ struct kvm_vm { vm_vaddr_t tss; vm_vaddr_t idt; vm_vaddr_t handlers; + uint32_t dirty_ring_size; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); -- cgit v1.3.1 From 019d321a68ea07efcfcbc308443251644ff3e71c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:39 -0400 Subject: KVM: selftests: Run dirty ring test asynchronously Previously the dirty ring test was working in synchronous way, because only with a vmexit (with that it was the ring full event) we'll know the hardware dirty bits will be flushed to the dirty ring. With this patch we first introduce a vcpu kick mechanism using SIGUSR1, which guarantees a vmexit and also therefore the flushing of hardware dirty bits. Once this is in place, we can keep the vcpu dirty work asynchronous of the whole collection procedure now. Still, we need to be very careful that when reaching the ring buffer soft limit (KVM_EXIT_DIRTY_RING_FULL) we must collect the dirty bits before continuing the vcpu. Further increase the dirty ring size to current maximum to make sure we torture more on the no-ring-full case, which should be the major scenario when the hypervisors like QEMU would like to use this feature. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012239.6159-1-peterx@redhat.com> [Use KVM_SET_SIGNAL_MASK+sigwait instead of a signal handler. - Paolo] Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 64 ++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index a7da4d9471e1..85c5d07dd243 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -62,7 +62,9 @@ # define test_and_clear_bit_le test_and_clear_bit #endif -#define TEST_DIRTY_RING_COUNT 1024 +#define TEST_DIRTY_RING_COUNT 65536 + +#define SIG_IPI SIGUSR1 /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or @@ -138,6 +140,12 @@ static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ static sem_t dirty_ring_vcpu_stop; static sem_t dirty_ring_vcpu_cont; +/* + * This is updated by the vcpu thread to tell the host whether it's a + * ring-full event. It should only be read until a sem_wait() of + * dirty_ring_vcpu_stop and before vcpu continues to run. + */ +static bool dirty_ring_vcpu_ring_full; /* * This is only used for verifying the dirty pages. Dirty ring has a very * tricky case when the ring just got full, kvm will do userspace exit due to @@ -176,6 +184,11 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; +static void vcpu_kick(void) +{ + pthread_kill(vcpu_thread, SIG_IPI); +} + /* * In our test we do signal tricks, let's use a better version of * sem_wait to avoid signal interrupts @@ -286,6 +299,8 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, static void dirty_ring_wait_vcpu(void) { + /* This makes sure that hardware PML cache flushed */ + vcpu_kick(); sem_wait_until(&dirty_ring_vcpu_stop); } @@ -301,9 +316,19 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, /* We only have one vcpu */ static uint32_t fetch_index = 0; uint32_t count = 0, cleared; + bool continued_vcpu = false; dirty_ring_wait_vcpu(); + if (!dirty_ring_vcpu_ring_full) { + /* + * This is not a ring-full event, it's safe to allow + * vcpu to continue + */ + dirty_ring_continue_vcpu(); + continued_vcpu = true; + } + /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID), slot, bitmap, num_pages, &fetch_index); @@ -314,7 +339,11 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); - dirty_ring_continue_vcpu(); + if (!continued_vcpu) { + TEST_ASSERT(dirty_ring_vcpu_ring_full, + "Didn't continue vcpu even without ring full"); + dirty_ring_continue_vcpu(); + } pr_info("Iteration %ld collected %u pages\n", iteration, count); } @@ -327,10 +356,15 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { /* We should allow this to continue */ ; - } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || + (ret == -1 && err == EINTR)) { /* Update the flag first before pause */ + WRITE_ONCE(dirty_ring_vcpu_ring_full, + run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); sem_post(&dirty_ring_vcpu_stop); - pr_info("vcpu stops because dirty ring is full...\n"); + pr_info("vcpu stops because %s...\n", + dirty_ring_vcpu_ring_full ? + "dirty ring is full" : "vcpu is kicked out"); sem_wait_until(&dirty_ring_vcpu_cont); pr_info("vcpu continues now.\n"); } else { @@ -458,9 +492,26 @@ static void *vcpu_worker(void *data) struct kvm_vm *vm = data; uint64_t *guest_array; uint64_t pages_count = 0; + struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) + + sizeof(sigset_t)); + sigset_t *sigset = (sigset_t *) &sigmask->sigset; vcpu_fd = vcpu_get_fd(vm, VCPU_ID); + /* + * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the + * ioctl to return with -EINTR, but it is still pending and we need + * to accept it with the sigwait. + */ + sigmask->len = 8; + pthread_sigmask(0, NULL, sigset); + vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask); + sigaddset(sigset, SIG_IPI); + pthread_sigmask(SIG_BLOCK, sigset, NULL); + + sigemptyset(sigset); + sigaddset(sigset, SIG_IPI); + guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); while (!READ_ONCE(host_quit)) { @@ -469,6 +520,11 @@ static void *vcpu_worker(void *data) pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ ret = ioctl(vcpu_fd, KVM_RUN, NULL); + if (ret == -1 && errno == EINTR) { + int sig = -1; + sigwait(sigset, &sig); + assert(sig == SIG_IPI); + } log_mode_after_vcpu_run(vm, ret, errno); } -- cgit v1.3.1 From edd3de6fc3d57deddb5cc7c7f1d8316ad26ac4e4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:41 -0400 Subject: KVM: selftests: Add "-c" parameter to dirty log test It's only used to override the existing dirty ring size/count. If with a bigger ring count, we test async of dirty ring. If with a smaller ring count, we test ring full code path. Async is default. It has no use for non-dirty-ring tests. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012241.6208-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 85c5d07dd243..b1f8731721b9 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -183,6 +183,7 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; /* Logging mode for current run */ static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; +static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; static void vcpu_kick(void) { @@ -257,7 +258,7 @@ static void dirty_ring_create_vm_done(struct kvm_vm *vm) * Switch to dirty ring mode after VM creation but before any * of the vcpu creation. */ - vm_enable_dirty_ring(vm, TEST_DIRTY_RING_COUNT * + vm_enable_dirty_ring(vm, test_dirty_ring_count * sizeof(struct kvm_dirty_gfn)); } @@ -279,7 +280,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, uint32_t count = 0; while (true) { - cur = &dirty_gfns[*fetch_index % TEST_DIRTY_RING_COUNT]; + cur = &dirty_gfns[*fetch_index % test_dirty_ring_count]; if (!dirty_gfn_is_dirtied(cur)) break; TEST_ASSERT(cur->slot == slot, "Slot number didn't match: " @@ -803,6 +804,9 @@ static void help(char *name) printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); puts(""); + printf(" -c: specify dirty ring size, in number of entries\n"); + printf(" (only useful for dirty-ring test; default: %"PRIu32")\n", + TEST_DIRTY_RING_COUNT); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", @@ -858,8 +862,11 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { + while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { switch (opt) { + case 'c': + test_dirty_ring_count = strtol(optarg, NULL, 10); + break; case 'i': iterations = strtol(optarg, NULL, 10); break; -- cgit v1.3.1 From 8aa426e854c475504033c176a66d038259bf64ea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Nov 2020 07:39:26 -0500 Subject: selftests: kvm: keep .gitignore add to date Add tsc_msrs_test, remove clear_dirty_log_test and alphabetize everything. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 7a2c242b7152..5468db7dd674 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -14,17 +14,17 @@ /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test -/x86_64/user_msr_test -/x86_64/vmx_preemption_timer_test /x86_64/svm_vmcall_test /x86_64/sync_regs_test +/x86_64/tsc_msrs_test +/x86_64/user_msr_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test +/x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test /x86_64/xss_msr_test -/clear_dirty_log_test /demand_paging_test /dirty_log_test /dirty_log_perf_test -- cgit v1.3.1 From f63f0b68c864edea801de678bed279a3d7674f1a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Nov 2020 11:36:49 -0500 Subject: KVM: selftests: always use manual clear in dirty_log_perf_test Nothing sets USE_CLEAR_DIRTY_LOG anymore, so anything it surrounds is dead code. However, it is the recommended way to use the dirty page bitmap for new enough kernel, so use it whenever KVM has the KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 capability. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 55 +++++++++-------------- 1 file changed, 22 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 85c9b8f73142..9c6a7be31e03 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -27,6 +27,7 @@ #define TEST_HOST_LOOP_N 2UL /* Host variables */ +static u64 dirty_log_manual_caps; static bool host_quit; static uint64_t iteration; static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; @@ -88,10 +89,6 @@ static void *vcpu_worker(void *data) return NULL; } -#ifdef USE_CLEAR_DIRTY_LOG -static u64 dirty_log_manual_caps; -#endif - static void run_test(enum vm_guest_mode mode, unsigned long iterations, uint64_t phys_offset, int wr_fract) { @@ -106,10 +103,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct timespec get_dirty_log_total = (struct timespec){0}; struct timespec vcpu_dirty_total = (struct timespec){0}; struct timespec avg; -#ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; -#endif vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); @@ -120,11 +115,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_num_pages = vm_num_host_pages(mode, guest_num_pages); bmap = bitmap_alloc(host_num_pages); -#ifdef USE_CLEAR_DIRTY_LOG - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); -#endif + if (dirty_log_manual_caps) { + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = dirty_log_manual_caps; + vm_enable_cap(vm, &cap); + } vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); @@ -190,17 +185,17 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", iteration, ts_diff.tv_sec, ts_diff.tv_nsec); -#ifdef USE_CLEAR_DIRTY_LOG - clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - host_num_pages); + if (dirty_log_manual_caps) { + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + host_num_pages); - ts_diff = timespec_diff_now(start); - clear_dirty_log_total = timespec_add(clear_dirty_log_total, - ts_diff); - pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", - iteration, ts_diff.tv_sec, ts_diff.tv_nsec); -#endif + ts_diff = timespec_diff_now(start); + clear_dirty_log_total = timespec_add(clear_dirty_log_total, + ts_diff); + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + } } /* Tell the vcpu thread to quit */ @@ -220,12 +215,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, iterations, get_dirty_log_total.tv_sec, get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); -#ifdef USE_CLEAR_DIRTY_LOG - avg = timespec_div(clear_dirty_log_total, iterations); - pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, clear_dirty_log_total.tv_sec, - clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); -#endif + if (dirty_log_manual_caps) { + avg = timespec_div(clear_dirty_log_total, iterations); + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, clear_dirty_log_total.tv_sec, + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); + } free(bmap); free(vcpu_threads); @@ -284,16 +279,10 @@ int main(int argc, char *argv[]) int opt, i; int wr_fract = 1; -#ifdef USE_CLEAR_DIRTY_LOG dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - if (!dirty_log_manual_caps) { - print_skip("KVM_CLEAR_DIRTY_LOG not available"); - exit(KSFT_SKIP); - } dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET); -#endif #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); -- cgit v1.3.1 From ec2f18bb4783648041498b06d4bff222821efed1 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:29 +0100 Subject: KVM: selftests: Make vm_create_default common The code is almost 100% the same anyway. Just move it to common and add a few arch-specific macros. Reviewed-by: Peter Xu Reviewed-by: Ben Gardon Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-5-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 23 +++++++++++++--- .../testing/selftests/kvm/lib/aarch64/processor.c | 17 ------------ tools/testing/selftests/kvm/lib/kvm_util.c | 26 ++++++++++++++++++ tools/testing/selftests/kvm/lib/s390x/processor.c | 22 --------------- tools/testing/selftests/kvm/lib/x86_64/processor.c | 32 ---------------------- 5 files changed, 45 insertions(+), 75 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 710009cc17fd..8154785196cb 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -45,13 +45,28 @@ enum vm_guest_mode { }; #if defined(__aarch64__) -#define VM_MODE_DEFAULT VM_MODE_P40V48_4K + +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + #elif defined(__x86_64__) -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K -#else -#define VM_MODE_DEFAULT VM_MODE_P52V48_4K + +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__s390x__) + +#define VM_MODE_DEFAULT VM_MODE_P52V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 16) + #endif +#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) +#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) + #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index d6c32c328e9a..cee92d477dc0 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -5,8 +5,6 @@ * Copyright (C) 2018, Red Hat, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include #include "kvm_util.h" @@ -219,21 +217,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) -{ - uint64_t ptrs_per_4k_pte = 512; - uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; - struct kvm_vm *vm; - - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); - - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); - vm_vcpu_add_default(vm, vcpuid, guest_code); - - return vm; -} - void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a04302666b02..70676a305523 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -5,6 +5,7 @@ * Copyright (C) 2018, Google LLC. */ +#define _GNU_SOURCE /* for program_invocation_name */ #include "test_util.h" #include "kvm_util.h" #include "kvm_util_internal.h" @@ -281,6 +282,31 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + /* The maximum page table size for a memory region will be when the + * smallest pages are used. Considering each page contains x page + * table descriptors, the total extra size for page tables (for extra + * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller + * than N/x*2. + */ + uint64_t extra_pg_pages = (extra_mem_pages / PTES_PER_MIN_PAGE) * 2; + struct kvm_vm *vm; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} + /* * VM Restart * diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 7349bb2e1a24..0152f356c099 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -5,8 +5,6 @@ * Copyright (C) 2019, Red Hat, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include "processor.h" #include "kvm_util.h" #include "../kvm_util_internal.h" @@ -160,26 +158,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) virt_dump_region(stream, vm, indent, vm->pgd); } -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) -{ - /* - * The additional amount of pages required for the page tables is: - * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ... - * which is definitely smaller than (n / 256) * 2. - */ - uint64_t extra_pg_pages = extra_mem_pages / 256 * 2; - struct kvm_vm *vm; - - vm = vm_create(VM_MODE_DEFAULT, - DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); - - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); - vm_vcpu_add_default(vm, vcpuid, guest_code); - - return vm; -} - void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index d10c5c05bdf0..95e1a757c629 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -5,8 +5,6 @@ * Copyright (C) 2018, Google LLC. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include "test_util.h" #include "kvm_util.h" #include "../kvm_util_internal.h" @@ -731,36 +729,6 @@ void vcpu_set_cpuid(struct kvm_vm *vm, } -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) -{ - struct kvm_vm *vm; - /* - * For x86 the maximum page table size for a memory region - * will be when only 4K pages are used. In that case the - * total extra size for page tables (for extra N pages) will - * be: N/512+N/512^2+N/512^3+... which is definitely smaller - * than N/512*2. - */ - uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; - - /* Create VM */ - vm = vm_create(VM_MODE_DEFAULT, - DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, - O_RDWR); - - /* Setup guest code */ - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); - - /* Setup IRQ Chip */ - vm_create_irqchip(vm); - - /* Add the first vCPU. */ - vm_vcpu_add_default(vm, vcpuid, guest_code); - - return vm; -} - /* * VCPU Get MSR * -- cgit v1.3.1 From 0aa9ec45d42779af711c7a209b5780ff7391b5bd Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:30 +0100 Subject: KVM: selftests: Introduce vm_create_[default_]_with_vcpus Introduce new vm_create variants that also takes a number of vcpus, an amount of per-vcpu pages, and optionally a list of vcpuids. These variants will create default VMs with enough additional pages to cover the vcpu stacks, per-vcpu pages, and pagetable pages for all. The new 'default' variant uses VM_MODE_DEFAULT, whereas the other new variant accepts the mode as a parameter. Reviewed-by: Peter Xu Reviewed-by: Ben Gardon Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-6-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 10 ++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 35 ++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 8154785196cb..dfa9d369e8fc 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -266,6 +266,16 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, void *guest_code); +/* Same as vm_create_default, but can be used for more than one vcpu */ +struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]); + +/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */ +struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, + uint64_t extra_mem_pages, uint32_t num_percpu_pages, + void *guest_code, uint32_t vcpuids[]); + /* * Adds a vCPU with reasonable defaults (e.g. a stack) * diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 70676a305523..df2035ac54a6 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -282,8 +282,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) +struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, + uint64_t extra_mem_pages, uint32_t num_percpu_pages, + void *guest_code, uint32_t vcpuids[]) { /* The maximum page table size for a memory region will be when the * smallest pages are used. Considering each page contains x page @@ -291,10 +292,18 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller * than N/x*2. */ - uint64_t extra_pg_pages = (extra_mem_pages / PTES_PER_MIN_PAGE) * 2; + uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus; + uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; struct kvm_vm *vm; + int i; + + TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), + "nr_vcpus = %d too large for host, max-vcpus = %d", + nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + pages = vm_adjust_num_guest_pages(mode, pages); + vm = vm_create(mode, pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); @@ -302,11 +311,27 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, vm_create_irqchip(vm); #endif - vm_vcpu_add_default(vm, vcpuid, guest_code); + for (i = 0; i < nr_vcpus; ++i) + vm_vcpu_add_default(vm, vcpuids ? vcpuids[i] : i, guest_code); return vm; } +struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]) +{ + return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages, + num_percpu_pages, guest_code, vcpuids); +} + +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code, + (uint32_t []){ vcpuid }); +} + /* * VM Restart * -- cgit v1.3.1 From 87c5f35e5c958278174979f13a9e40d3c9962c0f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:34 +0100 Subject: KVM: selftests: Also build dirty_log_perf_test on AArch64 Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-10-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3d14ef77755e..4febf4d5ead9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -70,6 +70,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test +TEST_GEN_PROGS_aarch64 += dirty_log_perf_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time -- cgit v1.3.1 From 024cd2cbd1ca2d29e6df538855d52c4e5990cab7 Mon Sep 17 00:00:00 2001 From: Santucci Pierpaolo Date: Mon, 16 Nov 2020 11:30:37 +0100 Subject: selftest/bpf: Fix IPV6FR handling in flow dissector From second fragment on, IPV6FR program must stop the dissection of IPV6 fragmented packet. This is the same approach used for IPV4 fragmentation. This fixes the flow keys calculation for the upper-layer protocols. Note that according to RFC8200, the first fragment packet must include the upper-layer header. Signed-off-by: Santucci Pierpaolo Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/X7JUzUj34ceE2wBm@santucci.pierpaolo --- tools/testing/selftests/bpf/progs/bpf_flow.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 5a65f6b51377..95a5a0778ed7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -368,6 +368,8 @@ PROG(IPV6FR)(struct __sk_buff *skb) */ if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) return export_flow_keys(keys, BPF_OK); + } else { + return export_flow_keys(keys, BPF_OK); } return parse_ipv6_proto(skb, fragh->nexthdr); -- cgit v1.3.1 From 601366678c93618f37a685332c0ba07e5556798c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 30 Oct 2020 14:47:42 +0900 Subject: perf data: Allow to use stdio functions for pipe mode When perf data is in a pipe, it reads each event separately using read(2) syscall. This is a huge performance bottleneck when processing large data like in perf inject. Also perf inject needs to use write(2) syscall for the output. So convert it to use buffer I/O functions in stdio library for pipe data. This makes inject-build-id bench time drops from 20ms to 8ms. $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.074 msec (+- 0.013 msec) Average time per event: 0.792 usec (+- 0.001 usec) Average memory usage: 8328 KB (+- 0 KB) Average build-id-all injection took: 5.490 msec (+- 0.008 msec) Average time per event: 0.538 usec (+- 0.001 usec) Average memory usage: 7563 KB (+- 0 KB) This patch enables it just for perf inject when used with pipe (it's a default behavior). Maybe we could do it for perf record and/or report later.. Committer testing: Before: $ perf stat -r 5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.605 msec (+- 0.064 msec) Average time per event: 1.334 usec (+- 0.006 usec) Average memory usage: 12220 KB (+- 7 KB) Average build-id-all injection took: 11.458 msec (+- 0.058 msec) Average time per event: 1.123 usec (+- 0.006 usec) Average memory usage: 11546 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.673 msec (+- 0.057 msec) Average time per event: 1.341 usec (+- 0.006 usec) Average memory usage: 12508 KB (+- 8 KB) Average build-id-all injection took: 11.437 msec (+- 0.046 msec) Average time per event: 1.121 usec (+- 0.004 usec) Average memory usage: 11812 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.641 msec (+- 0.069 msec) Average time per event: 1.337 usec (+- 0.007 usec) Average memory usage: 12302 KB (+- 8 KB) Average build-id-all injection took: 10.820 msec (+- 0.106 msec) Average time per event: 1.061 usec (+- 0.010 usec) Average memory usage: 11616 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.379 msec (+- 0.074 msec) Average time per event: 1.312 usec (+- 0.007 usec) Average memory usage: 12334 KB (+- 8 KB) Average build-id-all injection took: 11.288 msec (+- 0.071 msec) Average time per event: 1.107 usec (+- 0.007 usec) Average memory usage: 11657 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.534 msec (+- 0.058 msec) Average time per event: 1.327 usec (+- 0.006 usec) Average memory usage: 12264 KB (+- 8 KB) Average build-id-all injection took: 11.557 msec (+- 0.076 msec) Average time per event: 1.133 usec (+- 0.007 usec) Average memory usage: 11593 KB (+- 8 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 4,060.05 msec task-clock:u # 1.566 CPUs utilized ( +- 0.65% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 101,888 page-faults:u # 0.025 M/sec ( +- 0.12% ) 3,745,833,163 cycles:u # 0.923 GHz ( +- 0.10% ) (83.22%) 194,346,613 stalled-cycles-frontend:u # 5.19% frontend cycles idle ( +- 0.57% ) (83.30%) 708,495,034 stalled-cycles-backend:u # 18.91% backend cycles idle ( +- 0.48% ) (83.48%) 5,629,328,628 instructions:u # 1.50 insn per cycle # 0.13 stalled cycles per insn ( +- 0.21% ) (83.57%) 1,236,697,927 branches:u # 304.602 M/sec ( +- 0.16% ) (83.44%) 17,564,877 branch-misses:u # 1.42% of all branches ( +- 0.23% ) (82.99%) 2.5934 +- 0.0128 seconds time elapsed ( +- 0.49% ) $ After: $ perf stat -r 5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.560 msec (+- 0.125 msec) Average time per event: 0.839 usec (+- 0.012 usec) Average memory usage: 12520 KB (+- 8 KB) Average build-id-all injection took: 5.789 msec (+- 0.054 msec) Average time per event: 0.568 usec (+- 0.005 usec) Average memory usage: 11919 KB (+- 9 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.639 msec (+- 0.111 msec) Average time per event: 0.847 usec (+- 0.011 usec) Average memory usage: 12732 KB (+- 8 KB) Average build-id-all injection took: 5.647 msec (+- 0.069 msec) Average time per event: 0.554 usec (+- 0.007 usec) Average memory usage: 12093 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.551 msec (+- 0.096 msec) Average time per event: 0.838 usec (+- 0.009 usec) Average memory usage: 12739 KB (+- 8 KB) Average build-id-all injection took: 5.617 msec (+- 0.061 msec) Average time per event: 0.551 usec (+- 0.006 usec) Average memory usage: 12105 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.403 msec (+- 0.097 msec) Average time per event: 0.824 usec (+- 0.010 usec) Average memory usage: 12770 KB (+- 8 KB) Average build-id-all injection took: 5.611 msec (+- 0.085 msec) Average time per event: 0.550 usec (+- 0.008 usec) Average memory usage: 12134 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.518 msec (+- 0.102 msec) Average time per event: 0.835 usec (+- 0.010 usec) Average memory usage: 12518 KB (+- 10 KB) Average build-id-all injection took: 5.503 msec (+- 0.073 msec) Average time per event: 0.540 usec (+- 0.007 usec) Average memory usage: 11882 KB (+- 8 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 2,394.88 msec task-clock:u # 1.577 CPUs utilized ( +- 0.83% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 103,181 page-faults:u # 0.043 M/sec ( +- 0.11% ) 3,548,172,030 cycles:u # 1.482 GHz ( +- 0.30% ) (83.26%) 81,537,700 stalled-cycles-frontend:u # 2.30% frontend cycles idle ( +- 1.54% ) (83.24%) 876,631,544 stalled-cycles-backend:u # 24.71% backend cycles idle ( +- 1.14% ) (83.45%) 5,960,361,707 instructions:u # 1.68 insn per cycle # 0.15 stalled cycles per insn ( +- 0.27% ) (83.26%) 1,269,413,491 branches:u # 530.054 M/sec ( +- 0.10% ) (83.48%) 11,372,453 branch-misses:u # 0.90% of all branches ( +- 0.52% ) (83.31%) 1.51874 +- 0.00642 seconds time elapsed ( +- 0.42% ) $ Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201030054742.87740-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 2 ++ tools/perf/util/data.c | 41 ++++++++++++++++++++++++++++++++++++++--- tools/perf/util/data.h | 11 ++++++++++- tools/perf/util/header.c | 8 ++++---- tools/perf/util/session.c | 7 ++++--- 5 files changed, 58 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 452a75fe68e5..14d6c88fed76 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -853,10 +853,12 @@ int cmd_inject(int argc, const char **argv) .output = { .path = "-", .mode = PERF_DATA_MODE_WRITE, + .use_stdio = true, }, }; struct perf_data data = { .mode = PERF_DATA_MODE_READ, + .use_stdio = true, }; int ret; diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index c47aa34fdc0a..05bbcb663c41 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -174,8 +174,21 @@ static bool check_pipe(struct perf_data *data) is_pipe = true; } - if (is_pipe) - data->file.fd = fd; + if (is_pipe) { + if (data->use_stdio) { + const char *mode; + + mode = perf_data__is_read(data) ? "r" : "w"; + data->file.fptr = fdopen(fd, mode); + + if (data->file.fptr == NULL) { + data->file.fd = fd; + data->use_stdio = false; + } + } else { + data->file.fd = fd; + } + } return data->is_pipe = is_pipe; } @@ -334,6 +347,9 @@ int perf_data__open(struct perf_data *data) if (check_pipe(data)) return 0; + /* currently it allows stdio for pipe only */ + data->use_stdio = false; + if (!data->path) data->path = "perf.data"; @@ -353,7 +369,21 @@ void perf_data__close(struct perf_data *data) perf_data__close_dir(data); zfree(&data->file.path); - close(data->file.fd); + + if (data->use_stdio) + fclose(data->file.fptr); + else + close(data->file.fd); +} + +ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size) +{ + if (data->use_stdio) { + if (fread(buf, size, 1, data->file.fptr) == 1) + return size; + return feof(data->file.fptr) ? 0 : -1; + } + return readn(data->file.fd, buf, size); } ssize_t perf_data_file__write(struct perf_data_file *file, @@ -365,6 +395,11 @@ ssize_t perf_data_file__write(struct perf_data_file *file, ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size) { + if (data->use_stdio) { + if (fwrite(buf, size, 1, data->file.fptr) == 1) + return size; + return -1; + } return perf_data_file__write(&data->file, buf, size); } diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 75947ef6bc17..c563fcbb0288 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -2,6 +2,7 @@ #ifndef __PERF_DATA_H #define __PERF_DATA_H +#include #include enum perf_data_mode { @@ -16,7 +17,10 @@ enum perf_dir_version { struct perf_data_file { char *path; - int fd; + union { + int fd; + FILE *fptr; + }; unsigned long size; }; @@ -26,6 +30,7 @@ struct perf_data { bool is_pipe; bool is_dir; bool force; + bool use_stdio; enum perf_data_mode mode; struct { @@ -62,11 +67,15 @@ static inline bool perf_data__is_single_file(struct perf_data *data) static inline int perf_data__fd(struct perf_data *data) { + if (data->use_stdio) + return fileno(data->file.fptr); + return data->file.fd; } int perf_data__open(struct perf_data *data); void perf_data__close(struct perf_data *data); +ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size); ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size); ssize_t perf_data_file__write(struct perf_data_file *file, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 598285a21dad..b9171bd11fe6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3647,7 +3647,8 @@ static int perf_file_section__process(struct perf_file_section *section, } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, - struct perf_header *ph, int fd, + struct perf_header *ph, + struct perf_data* data, bool repipe) { struct feat_fd ff = { @@ -3656,7 +3657,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, }; ssize_t ret; - ret = readn(fd, header, sizeof(*header)); + ret = perf_data__read(data, header, sizeof(*header)); if (ret <= 0) return -1; @@ -3679,8 +3680,7 @@ static int perf_header__read_pipe(struct perf_session *session) struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; - if (perf_file_header__read_pipe(&f_header, header, - perf_data__fd(session->data), + if (perf_file_header__read_pipe(&f_header, header, session->data, session->repipe) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 098080287c68..5cc722b6fe7c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1937,7 +1937,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data__fd(session->data); union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -1957,7 +1956,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session) ordered_events__set_copy_on_queue(oe, true); more: event = buf; - err = readn(fd, event, sizeof(struct perf_event_header)); + err = perf_data__read(session->data, event, + sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) goto done; @@ -1989,7 +1989,8 @@ more: p += sizeof(struct perf_event_header); if (size - sizeof(struct perf_event_header)) { - err = readn(fd, p, size - sizeof(struct perf_event_header)); + err = perf_data__read(session->data, p, + size - sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) { pr_err("unexpected end of event stream\n"); -- cgit v1.3.1 From 8113ab20e850491b4144a1a64246f07a2d737a49 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 15 Oct 2020 12:28:58 +0200 Subject: tools/power/cpupower: Read energy_perf_bias from sysfs ... instead of poking at the MSR. For that, move the accessor functions to misc.c and add a sysfs-writing function too. There should be no functional changes resulting from this. Signed-off-by: Borislav Petkov Reviewed-by: Shuah Khan Cc: Thomas Renninger Link: https://lkml.kernel.org/r/20201029190259.3476-2-bp@alien8.de --- tools/power/cpupower/lib/cpupower.c | 23 ++++++++++++- tools/power/cpupower/lib/cpupower_intern.h | 5 +++ tools/power/cpupower/utils/cpupower-info.c | 2 +- tools/power/cpupower/utils/cpupower-set.c | 2 +- tools/power/cpupower/utils/helpers/helpers.h | 8 ++--- tools/power/cpupower/utils/helpers/misc.c | 48 ++++++++++++++++++++++++++++ tools/power/cpupower/utils/helpers/msr.c | 28 ---------------- 7 files changed, 81 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index 3656e697537e..3f7d0c0c5067 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -16,8 +16,8 @@ unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen) { - int fd; ssize_t numread; + int fd; fd = open(path, O_RDONLY); if (fd == -1) @@ -35,6 +35,27 @@ unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen) return (unsigned int) numread; } +unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen) +{ + ssize_t numwritten; + int fd; + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwritten = write(fd, buf, buflen - 1); + if (numwritten < 1) { + perror(path); + close(fd); + return -1; + } + + close(fd); + + return (unsigned int) numwritten; +} + /* * Detect whether a CPU is online * diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h index 4887c76d23f8..ac1112b956ec 100644 --- a/tools/power/cpupower/lib/cpupower_intern.h +++ b/tools/power/cpupower/lib/cpupower_intern.h @@ -1,6 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #define PATH_TO_CPU "/sys/devices/system/cpu/" + +#ifndef MAX_LINE_LEN #define MAX_LINE_LEN 4096 +#endif + #define SYSFS_PATH_MAX 255 unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); +unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen); diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 0ba61a2c4d81..06345b543786 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -101,7 +101,7 @@ int cmd_info(int argc, char **argv) } if (params.perf_bias) { - ret = msr_intel_get_perf_bias(cpu); + ret = cpupower_intel_get_perf_bias(cpu); if (ret < 0) { fprintf(stderr, _("Could not read perf-bias value[%d]\n"), ret); diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 052044d7e012..180d5ba877e6 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -95,7 +95,7 @@ int cmd_set(int argc, char **argv) } if (params.perf_bias) { - ret = msr_intel_set_perf_bias(cpu, perf_bias); + ret = cpupower_intel_set_perf_bias(cpu, perf_bias); if (ret) { fprintf(stderr, _("Error setting perf-bias " "value on CPU %d\n"), cpu); diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index c258eeccd05f..37dac161f3fe 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -105,8 +105,8 @@ extern struct cpupower_cpu_info cpupower_cpu_info; extern int read_msr(int cpu, unsigned int idx, unsigned long long *val); extern int write_msr(int cpu, unsigned int idx, unsigned long long val); -extern int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val); -extern int msr_intel_get_perf_bias(unsigned int cpu); +extern int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val); +extern int cpupower_intel_get_perf_bias(unsigned int cpu); extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); /* Read/Write msr ****************************/ @@ -150,9 +150,9 @@ static inline int read_msr(int cpu, unsigned int idx, unsigned long long *val) { return -1; }; static inline int write_msr(int cpu, unsigned int idx, unsigned long long val) { return -1; }; -static inline int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val) +static inline int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val) { return -1; }; -static inline int msr_intel_get_perf_bias(unsigned int cpu) +static inline int cpupower_intel_get_perf_bias(unsigned int cpu) { return -1; }; static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu) { return 0; }; diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index f406adc40bad..e8f8f643a627 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -1,7 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + #if defined(__i386__) || defined(__x86_64__) #include "helpers/helpers.h" +#include "helpers/sysfs.h" + +#include "cpupower_intern.h" #define MSR_AMD_HWCR 0xc0010015 @@ -40,4 +48,44 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, *support = *active = 1; return 0; } + +int cpupower_intel_get_perf_bias(unsigned int cpu) +{ + char linebuf[MAX_LINE_LEN]; + char path[SYSFS_PATH_MAX]; + unsigned long val; + char *endp; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS)) + return -1; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu); + + if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0) + return -1; + + val = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + + return val; +} + +int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[3] = {}; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS)) + return -1; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu); + snprintf(linebuf, sizeof(linebuf), "%d", val); + + if (cpupower_write_sysfs(path, linebuf, 3) <= 0) + return -1; + + return 0; +} + #endif /* #if defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/helpers/msr.c b/tools/power/cpupower/utils/helpers/msr.c index ab9950748838..8b0b6be74bb8 100644 --- a/tools/power/cpupower/utils/helpers/msr.c +++ b/tools/power/cpupower/utils/helpers/msr.c @@ -11,7 +11,6 @@ /* Intel specific MSRs */ #define MSR_IA32_PERF_STATUS 0x198 #define MSR_IA32_MISC_ENABLES 0x1a0 -#define MSR_IA32_ENERGY_PERF_BIAS 0x1b0 #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1ad /* @@ -73,33 +72,6 @@ int write_msr(int cpu, unsigned int idx, unsigned long long val) return -1; } -int msr_intel_get_perf_bias(unsigned int cpu) -{ - unsigned long long val; - int ret; - - if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS)) - return -1; - - ret = read_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &val); - if (ret) - return ret; - return val; -} - -int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val) -{ - int ret; - - if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS)) - return -1; - - ret = write_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, val); - if (ret) - return ret; - return 0; -} - unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu) { unsigned long long val; -- cgit v1.3.1 From 6d6501d912a9a5e1b73d7fbf419b90a8ec11ed7a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 15 Oct 2020 14:50:16 +0200 Subject: tools/power/turbostat: Read energy_perf_bias from sysfs ... instead of poking at the MSR directly. Signed-off-by: Borislav Petkov Cc: Len Brown Cc: linux-pm@vger.kernel.org Link: https://lkml.kernel.org/r/20201029190259.3476-3-bp@alien8.de --- tools/power/x86/turbostat/turbostat.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 33b370865d16..0baec7ea1bbd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1721,6 +1721,25 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) return 0; } +int get_epb(int cpu) +{ + char path[128 + PATH_BYTES]; + int ret, epb = -1; + FILE *fp; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); + + fp = fopen_or_die(path, "r"); + + ret = fscanf(fp, "%d", &epb); + if (ret != 1) + err(1, "%s(%s)", __func__, path); + + fclose(fp); + + return epb; +} + void get_apic_id(struct thread_data *t) { unsigned int eax, ebx, ecx, edx; @@ -3631,9 +3650,8 @@ dump_sysfs_pstate_config(void) */ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) { - unsigned long long msr; char *epb_string; - int cpu; + int cpu, epb; if (!has_epb) return 0; @@ -3649,10 +3667,11 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; } - if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) + epb = get_epb(cpu); + if (epb < 0) return 0; - switch (msr & 0xF) { + switch (epb) { case ENERGY_PERF_BIAS_PERFORMANCE: epb_string = "performance"; break; @@ -3666,7 +3685,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) epb_string = "custom"; break; } - fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); + fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); return 0; } -- cgit v1.3.1 From fe0a5788624c8b8f113a35bbe4636e37f9321241 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 15 Oct 2020 14:58:48 +0200 Subject: tools/power/x86_energy_perf_policy: Read energy_perf_bias from sysfs ... and stop poking at the MSR directly. Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201029190259.3476-4-bp@alien8.de --- .../x86_energy_perf_policy.c | 109 +++++++++++++++++++-- 1 file changed, 99 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 3fe1eed900d4..ad6aed1fabf8 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -91,6 +91,9 @@ unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */ unsigned int bdx_highest_ratio; +#define PATH_TO_CPU "/sys/devices/system/cpu/" +#define SYSFS_PATH_MAX 255 + /* * maintain compatibility with original implementation, but don't document it: */ @@ -668,6 +671,48 @@ int put_msr(int cpu, int offset, unsigned long long new_msr) return 0; } +static unsigned int read_sysfs(const char *path, char *buf, size_t buflen) +{ + ssize_t numread; + int fd; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +static unsigned int write_sysfs(const char *path, char *buf, size_t buflen) +{ + ssize_t numwritten; + int fd; + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwritten = write(fd, buf, buflen - 1); + if (numwritten < 1) { + perror("write failed\n"); + close(fd); + return -1; + } + + close(fd); + + return (unsigned int) numwritten; +} + void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str) { if (cpu != -1) @@ -745,17 +790,61 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms put_msr(cpu, msr_offset, msr); } +static int get_epb(int cpu) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[3]; + char *endp; + long val; + + if (!has_epb) + return -1; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu); + + if (!read_sysfs(path, linebuf, 3)) + return -1; + + val = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + + return (int)val; +} + +static int set_epb(int cpu, int val) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[3]; + char *endp; + int ret; + + if (!has_epb) + return -1; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu); + snprintf(linebuf, sizeof(linebuf), "%d", val); + + ret = write_sysfs(path, linebuf, 3); + if (ret <= 0) + return -1; + + val = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + + return (int)val; +} + int print_cpu_msrs(int cpu) { - unsigned long long msr; struct msr_hwp_request req; struct msr_hwp_cap cap; + int epb; - if (has_epb) { - get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); - - printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr); - } + epb = get_epb(cpu); + if (epb >= 0) + printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb); if (!has_hwp) return 0; @@ -1038,15 +1127,15 @@ int enable_hwp_on_cpu(int cpu) int update_cpu_msrs(int cpu) { unsigned long long msr; - + int epb; if (update_epb) { - get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); - put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb); + epb = get_epb(cpu); + set_epb(cpu, new_epb); if (verbose) printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", - cpu, (unsigned int) msr, (unsigned int) new_epb); + cpu, epb, (unsigned int) new_epb); } if (update_turbo) { -- cgit v1.3.1 From 1c756cd429d8f3da33d31f2a970284b9d5260534 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Fri, 13 Nov 2020 20:38:26 +0000 Subject: perf inject: Fix file corruption due to event deletion "perf inject" can create corrupt files when synthesizing sample events from AUX data. This happens when in the input file, the first event (for the AUX data) has a different sample_type from the second event (generally dummy). Specifically, they differ in the bits that indicate the standard fields appended to perf records in the mmap buffer. "perf inject" deletes the first event and moves up the second event to first position. The problem is with the synthetic PERF_RECORD_MMAP (etc.) events created by "perf record". Since these are synthetic versions of events which are normally produced by the kernel, they have to have the standard fields appended as described by sample_type. "perf record" fills these in with zeroes, including the IDENTIFIER field; perf readers interpret records with zero IDENTIFIER using the descriptor for the first event in the file. Since "perf inject" changes the first event, these synthetic records are then processed with the wrong value of sample_type, and the perf reader reads bad data, reports on incorrect length records etc. Mismatching sample_types are seen with "perf record -e cs_etm//", where the AUX event has TID|TIME|CPU|IDENTIFIER and the dummy event has TID|TIME|IDENTIFIER. Perhaps they could be the same, but it isn't normally a problem if they aren't - perf has no problems reading the file. The sample_types have to agree on the position of IDENTIFIER, because that's how perf finds the right event descriptor in the first place, but they don't normally have to agree on other fields, and perf doesn't check that they do. The problem is specific to the way "perf inject" reorganizes the events and the way synthetic MMAP events are recorded with a zero identifier. A simple solution is to stop "perf inject" deleting the tracing event. Committer testing Removed the now unused 'evsel' variable, update the comment about the evsel removal not being performed anymore, and apply the patch manually as it failed with this warning: warning: Patch sent with format=flowed; space at the end of lines might be lost. Testing it with: $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.543 msec (+- 0.130 msec) Average time per event: 0.838 usec (+- 0.013 usec) Average memory usage: 12717 KB (+- 9 KB) Average build-id-all injection took: 5.710 msec (+- 0.058 msec) Average time per event: 0.560 usec (+- 0.006 usec) Average memory usage: 12079 KB (+- 7 KB) $ Signed-off-by: Al Grant Acked-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra LPU-Reference: b9cf5611-daae-2390-3439-6617f8f0a34b@foss.arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 452a75fe68e5..0462dc8db2e3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -779,25 +779,15 @@ static int __cmd_inject(struct perf_inject *inject) dsos__hit_all(session); /* * The AUX areas have been removed and replaced with - * synthesized hardware events, so clear the feature flag and - * remove the evsel. + * synthesized hardware events, so clear the feature flag. */ if (inject->itrace_synth_opts.set) { - struct evsel *evsel; - perf_header__clear_feat(&session->header, HEADER_AUXTRACE); if (inject->itrace_synth_opts.last_branch || inject->itrace_synth_opts.add_last_branch) perf_header__set_feat(&session->header, HEADER_BRANCH_STACK); - evsel = perf_evlist__id2evsel_strict(session->evlist, - inject->aux_id); - if (evsel) { - pr_debug("Deleting %s\n", evsel__name(evsel)); - evlist__remove(session->evlist, evsel); - evsel__delete(evsel); - } } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; -- cgit v1.3.1 From 3d05181a085c7a070746c838ea25aebf25f17d52 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 2 Nov 2020 16:00:25 +0800 Subject: perf vendor events: Update Skylake client events to v50 - Update Skylake events to v50. - Update Skylake JSON metrics from TMAM 4.0. - Fix the issue in DRAM_Parallel_Reads - Fix the perf test warning Before: root@kbl-ppc:~# perf stat -M DRAM_Parallel_Reads -- sleep 1 event syntax error: '{arb/event=0x80,umask=0x2/,arb/event=0x80,umask=0x2,thresh=1/}:W' \___ unknown term 'thresh' for pmu 'uncore_arb' valid terms: event,edge,inv,umask,cmask,config,config1,config2,name,period,percore Initial error: event syntax error: '..umask=0x2/,arb/event=0x80,umask=0x2,thresh=1/}:W' \___ Cannot find PMU `arb'. Missing kernel support? root@kbl-ppc:~# perf test metrics 10: PMU events : 10.3: Parsing of PMU event table metrics : Skip (some metrics failed) 10.4: Parsing of PMU event table metrics with fake PMUs: Ok 67: Parse and process metrics : Ok After: root@kbl-ppc:~# perf stat -M MEM_Parallel_Reads -- sleep 1 Performance counter stats for 'system wide': 4,951,646 arb/event=0x80,umask=0x2/ # 26.30 MEM_Parallel_Reads (50.04%) 188,251 arb/event=0x80,umask=0x2,cmask=1/ (49.96%) 1.000867010 seconds time elapsed root@kbl-ppc:~# perf test metrics 10: PMU events : 10.3: Parsing of PMU event table metrics : Ok 10.4: Parsing of PMU event table metrics with fake PMUs: Ok 67: Parse and process metrics : Ok Signed-off-by: Jin Yao Tested-by: Namhyung Kim Acked-by: Andi Kleen Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/93fae76f-ce2b-ab0b-3ae9-cc9a2b4cbaec@linux.intel.com/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/skylake/cache.json | 4100 ++++++++++---------- .../arch/x86/skylake/floating-point.json | 76 +- .../perf/pmu-events/arch/x86/skylake/frontend.json | 644 +-- tools/perf/pmu-events/arch/x86/skylake/memory.json | 2279 +++++------ tools/perf/pmu-events/arch/x86/skylake/other.json | 60 +- .../perf/pmu-events/arch/x86/skylake/pipeline.json | 1266 +++--- .../pmu-events/arch/x86/skylake/skl-metrics.json | 271 +- .../arch/x86/skylake/virtual-memory.json | 374 +- 8 files changed, 4560 insertions(+), 4510 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/skylake/cache.json b/tools/perf/pmu-events/arch/x86/skylake/cache.json index 720458139049..27ea2b00ad00 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylake/cache.json @@ -1,2928 +1,2926 @@ [ { - "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read miss L2, no rejects", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x22", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts L2 cache misses when fetching instructions.", - "EventCode": "0x24", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400108000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Demand requests that miss L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x27", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100040002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x38", - "EventName": "L2_RQSTS.PF_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0080001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "All requests that miss L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "All requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", - "EventCode": "0x24", + "BriefDescription": "Counts all demand code readshave any response type.", "Counter": "0,1,2,3", - "UMask": "0xc1", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010004", + "Offcore": "1", + "PublicDescription": "Counts all demand code readshave any response type.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0xc2", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100020002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", - "EventCode": "0x24", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", "Counter": "0,1,2,3", - "UMask": "0xc4", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xd8", - "EventName": "L2_RQSTS.PF_HIT", + "EventName": "L2_RQSTS.PF_MISS", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x38" }, { - "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0xe1", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC01C0004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", - "EventCode": "0x24", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0xe2", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100040001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the total number of L2 code requests.", - "EventCode": "0x24", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0xe4", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040100001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Demand requests to L2 cache.", - "EventCode": "0x24", + "BriefDescription": "Demand requests that miss L2 cache", "Counter": "0,1,2,3", - "UMask": "0xe7", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xf8", - "EventName": "L2_RQSTS.ALL_PF", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "PublicDescription": "Demand requests that miss L2 cache.", "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x27" }, { - "PublicDescription": "All L2 requests.", - "EventCode": "0x24", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0xff", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "All L2 requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400100002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", - "EventCode": "0x2E", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x41", - "Errata": "SKL057", - "EventName": "LONGEST_LAT_CACHE.MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests missed L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", - "EventCode": "0x2E", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x4f", - "Errata": "SKL057", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040080004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests that refer to L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", - "EventCode": "0x48", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D miss outstandings duration in cycles", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", - "EventCode": "0x48", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x48", + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.", + "SampleAfterValue": "50021", + "UMask": "0x4" }, { - "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", - "EventCode": "0x48", + "BriefDescription": "L2 writebacks that access L2 cache", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF0", + "EventName": "L2_TRANS.L2_WB", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x40" }, { - "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", - "EventCode": "0x51", + "BriefDescription": "L2 cache lines filling L2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D.REPLACEMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D data line replacements", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF1", + "EventName": "L2_LINES_IN.ALL", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "SampleAfterValue": "100003", + "UMask": "0x1f" }, { - "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", - "EventCode": "0x60", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", - "EventCode": "0x60", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00401C0002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400040004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Demand Data Read requests sent to uncore", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000400001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080080002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080100001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", - "EventCode": "0xB0", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001C0004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", - "BriefDescription": "Demand Data Read requests sent to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", - "EventCode": "0xB0", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00801C0002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", - "BriefDescription": "Cacheable and noncachaeble code read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", - "EventCode": "0xB0", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080100004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", - "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", - "EventCode": "0xB0", + "BriefDescription": "Retired load instructions missed L3 cache as data sources", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Demand and prefetch data reads", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.", + "SampleAfterValue": "100007", + "UMask": "0x20" }, { - "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", - "EventCode": "0xB0", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040020001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", "SampleAfterValue": "100003", - "BriefDescription": "Any memory transaction that reached the SQ.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", - "EventCode": "0xB2", + "BriefDescription": "All retired store instructions.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x82" }, { - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "EventCode": "0xB7, 0xBB", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080020004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions that miss the STLB.", - "EventCode": "0xD0", + "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", "Counter": "0,1,2,3", - "UMask": "0x11", - "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.SILENT", + "SampleAfterValue": "200003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired store instructions that miss the STLB.", - "EventCode": "0xD0", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x12", - "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", - "SampleAfterValue": "100003", - "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040048000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD0", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "MEM_INST_RETIRED.LOCK_LOADS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions with locked access. (Precise Event)", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000088000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD0", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD0", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x02001C0002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", - "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD0", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "MEM_INST_RETIRED.ALL_LOADS", - "SampleAfterValue": "2000003", - "BriefDescription": "All retired load instructions. (Precise Event)", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080020001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "All retired store instructions.", - "EventCode": "0xD0", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "MEM_INST_RETIRED.ALL_STORES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store instructions. (Precise Event)", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" + "UMask": "0x8" }, { - "PEBS": "1", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", - "EventCode": "0xD1", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Retired load instructions with L1 cache hits as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x02001C0004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", - "EventCode": "0xD1", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_RETIRED.L2_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions with L2 cache hits as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400040002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", - "EventCode": "0xD1", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load instructions with L3 cache hits as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", - "EventCode": "0xD1", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_RETIRED.L1_MISS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions missed L1 cache as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L2 cache as data sources.", - "EventCode": "0xD1", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "MEM_LOAD_RETIRED.L2_MISS", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load instructions missed L2 cache as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04001C0001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L3 cache as data sources.", - "EventCode": "0xD1", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions missed L3 cache as data sources", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100400001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", - "EventCode": "0xD1", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04001C8000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xD2", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000080001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", - "EventCode": "0xD2", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200020001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", - "EventCode": "0xD2", + "BriefDescription": "Core-originated cacheable demand requests missed L3", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL057", + "EventCode": "0x2E", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x41" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", - "EventCode": "0xD2", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_PF", + "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "SampleAfterValue": "200003", + "UMask": "0xf8" }, { - "PEBS": "1", - "EventCode": "0xD4", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_MISC_RETIRED.UC", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0080004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts L2 writebacks that access L2 cache.", - "EventCode": "0xF0", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "L2_TRANS.L2_WB", - "SampleAfterValue": "200003", - "BriefDescription": "L2 writebacks that access L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", - "EventCode": "0xF1", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1f", - "EventName": "L2_LINES_IN.ALL", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040040002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", - "BriefDescription": "L2 cache lines filling L2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xF2", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L2_LINES_OUT.SILENT", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xF2", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L2_LINES_OUT.NON_SILENT", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", - "EventCode": "0xF2", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_LINES_OUT.USELESS_PREF", - "SampleAfterValue": "200003", - "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "EventCode": "0xF2", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_LINES_OUT.USELESS_HWPF", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200080004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", - "EventCode": "0xF4", + "BriefDescription": "RFO requests that miss L2 cache", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "SQ_MISC.SPLIT_LOCK", - "SampleAfterValue": "100003", - "BriefDescription": "Number of cache line split locks sent to uncore.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x22" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0408000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000408000", + "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.NON_SILENT", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040028000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400408000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200040002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200408000", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100408000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080408000", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.USELESS_HWPF", + "SampleAfterValue": "200003", + "UMask": "0x4" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040408000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC01C8000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001C8000", + "BriefDescription": "Counts all demand data writes (RFOs)have any response type.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)have any response type.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001C8000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04001C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001C8000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00401C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001C8000", + "BriefDescription": "All requests that miss L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.MISS", + "PublicDescription": "All requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x3f" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801C8000", + "BriefDescription": "L2 code requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "UMask": "0xe4" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401C8000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0108000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000108000", + "BriefDescription": "RFO requests that hit L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc2" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400108000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200108000", + "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100108000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080108000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040108000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000040002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0028000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0088000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000088000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400088000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000080004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200088000", + "BriefDescription": "Retired load instructions missed L1 cache as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100088000", + "BriefDescription": "L2 cache misses when fetching instructions", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "SampleAfterValue": "200003", + "UMask": "0x24" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080088000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01001C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040088000", + "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0048000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000048000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC01C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400048000", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200048000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100048000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x02001C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080048000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040048000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0028000", + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000028000", + "BriefDescription": "Demand requests to L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "PublicDescription": "Demand requests to L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xe7" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400028000", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100048000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200028000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100028000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040020002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080028000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040028000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0040002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests have any response type.", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000018000", + "AnyThread": "1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC01C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x02001C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400004", + "BriefDescription": "Core-originated cacheable demand requests that refer to L3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL057", + "EventCode": "0x2E", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4f" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC01C0004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001C0004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001C0004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001C0004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001C0004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801C0004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401C0004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0100004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100004", + "BriefDescription": "Retired load instructions that miss the STLB.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PEBS": "1", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x11" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100004", + "BriefDescription": "Counts demand data readshave any response type.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data readshave any response type.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080080004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0080004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080004", + "BriefDescription": "L1D data line replacements", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x51", + "EventName": "L1D.REPLACEMENT", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080004", + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x4" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080004", + "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", + "SampleAfterValue": "100007", + "UMask": "0x40" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0040004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040004", + "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.USELESS_PREF", + "SampleAfterValue": "200003", + "UMask": "0x4" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000020002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040004", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.PF_HIT", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xd8" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040004", + "BriefDescription": "Demand Data Read miss L2, no rejects", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0x21" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0020004", + "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020004", + "BriefDescription": "All retired load instructions.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x81" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020004", + "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "PEBS": "1", + "SampleAfterValue": "20011", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020004", + "BriefDescription": "Demand Data Read requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0xe1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080020002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020004", + "BriefDescription": "All L2 requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "All L2 requests.", + "SampleAfterValue": "200003", + "UMask": "0xff" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010004", + "BriefDescription": "Cycles with L1D load Misses outstanding.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400002", + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00401C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400002", + "BriefDescription": "Number of cache line split locks sent to uncore.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF4", + "EventName": "SQ_MISC.SPLIT_LOCK", + "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC01C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC01C0002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001C0002", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001C0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001C0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001C0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801C0002", + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PEBS": "1", + "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401C0002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0100002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100002", + "BriefDescription": "L1D miss outstandings duration in cycles", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100002", + "BriefDescription": "Demand Data Read requests that hit L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", + "SampleAfterValue": "200003", + "UMask": "0xc1" + }, + { "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04001C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0080002", + "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01001C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080002", + "BriefDescription": "Retired store instructions that split across a cacheline boundary.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x42" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0040002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00801C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0020002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040002", + "BriefDescription": "Any memory transaction that reached the SQ.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x80" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040002", + "BriefDescription": "Counts any other requestshave any response type.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000018000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requestshave any response type.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040002", + "BriefDescription": "Cacheable and noncachaeble code read requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0020002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020002", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020002", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "SampleAfterValue": "200003", + "UMask": "0xc4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs) have any response type.", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0400001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040088000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200020002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400001", + "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL", + "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0040108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC01C0001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001C0001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001C0001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400080004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001C0001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100080004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001C0001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801C0001", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00801C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401C0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0100001", + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400020004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100001", + "BriefDescription": "Retired load instructions with locked access.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x21" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0080001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00801C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080040002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01001C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080001", + "BriefDescription": "Demand and prefetch data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080001", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080001", + "BriefDescription": "Retired load instructions that split across a cacheline boundary.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x41" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0040001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0100080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040001", + "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB2", + "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040001", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040001", + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200080002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080100002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040001", + "BriefDescription": "Retired store instructions that miss the STLB.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", "SampleAfterValue": "100003", + "UMask": "0x12" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC0020001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01001C8000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020001", + "BriefDescription": "RFO requests to L2 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_RFO", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "SampleAfterValue": "200003", + "UMask": "0xe2" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0200400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020001", + "BriefDescription": "Retired load instructions missed L2 cache as data sources", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PEBS": "1", + "PublicDescription": "Retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "50021", + "UMask": "0x10" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0400020002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0080080001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020001", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC0080002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads have any response type.", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010001", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00401C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json index 213dd6230cf2..834e1cd841fc 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json @@ -1,67 +1,67 @@ [ { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x2", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0xC7", + "BriefDescription": "Cycles with any input/output SSE or FP assist", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xCA", + "EventName": "FP_ASSIST.ANY", + "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", + "SampleAfterValue": "100003", + "UMask": "0x1e" }, { - "EventCode": "0xC7", + "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", - "EventCode": "0xCA", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "UMask": "0x1e", - "EventName": "FP_ASSIST.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Cycles with any input/output SSE or FP assist", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "UMask": "0x8" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json index 7fa95a35e3ca..e84504d6adea 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json @@ -1,482 +1,516 @@ [ { - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE_16B.IFDATA_STALL", + "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Retired Instructions who experienced iTLB true miss.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x14", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", + "MSRIndex": "0x3F7", + "MSRValue": "0x408006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "UMask": "0x8", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", "EventName": "IDQ.DSB_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xE6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x11", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering 4 Uops", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", + "MSRIndex": "0x3F7", + "MSRValue": "0x401006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MITE_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering any Uop", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", - "EventCode": "0x79", + "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", + "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "EventCode": "0x79", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "EventName": "IDQ.MS_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x30" }, { - "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", - "EventCode": "0x80", + "BriefDescription": "Cycles MITE is delivering any Uop", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ICACHE_16B.IFDATA_STALL", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x24" }, { - "EventCode": "0x83", + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", "EventName": "ICACHE_64B.IFTAG_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x83", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ICACHE_64B.IFTAG_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_SWITCHES", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "SampleAfterValue": "2000003", + "UMask": "0x30" }, { - "EventCode": "0x83", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ICACHE_64B.IFTAG_STALL", - "SampleAfterValue": "200003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.L2_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x13", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", - "EventCode": "0x9C", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MITE_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", - "EventCode": "0x9C", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "MSRIndex": "0x3F7", + "MSRValue": "0x404006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", - "EventCode": "0x9C", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", - "EventCode": "0x9C", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", - "EventCode": "0x9C", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_STALL", + "SampleAfterValue": "200003", + "UMask": "0x4" }, { - "EventCode": "0x9C", - "Invert": "1", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", - "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", - "EventCode": "0xAB", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", - "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x18" }, { - "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", - "EventCode": "0xC6", - "MSRValue": "0x11", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.DSB_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", + "MSRValue": "0x15", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x12", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.L1I_MISS", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x13", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.L2_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", "MSRIndex": "0x3F7", + "MSRValue": "0x420006", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", - "EventCode": "0xC6", - "MSRValue": "0x14", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", + "MSRValue": "0x400806", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", - "EventCode": "0xC6", - "MSRValue": "0x15", + "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.STLB_MISS", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", "MSRIndex": "0x3F7", + "MSRValue": "0x400106", + "PEBS": "2", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x400206", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", "MSRIndex": "0x3F7", + "MSRValue": "0x400206", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x200206", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", "MSRIndex": "0x3F7", + "MSRValue": "0x400406", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x400406", + "BriefDescription": "Cycles MITE is delivering 4 Uops", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x24" }, { - "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", - "EventCode": "0xC6", - "MSRValue": "0x400806", + "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_DSB_CYCLES", + "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", - "EventCode": "0xC6", - "MSRValue": "0x401006", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", + "SampleAfterValue": "2000003", + "UMask": "0x30" }, { - "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", - "EventCode": "0xC6", - "MSRValue": "0x402006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", "MSRIndex": "0x3F7", + "MSRValue": "0x410006", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x404006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", "MSRIndex": "0x3F7", + "MSRValue": "0x200206", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x408006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", "MSRIndex": "0x3F7", + "MSRValue": "0x300206", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x410006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", + "MSRValue": "0x100206", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x420006", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", - "MSRIndex": "0x3F7", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x18" }, { - "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", - "EventCode": "0xC6", - "MSRValue": "0x100206", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.COUNT", + "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", + "MSRValue": "0x402006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "1", - "EventCode": "0xC6", - "MSRValue": "0x300206", + "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "Invert": "1", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_MISS", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.L1I_MISS", "MSRIndex": "0x3F7", + "MSRValue": "0x12", + "PEBS": "1", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", + "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", + "SampleAfterValue": "2000003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/memory.json b/tools/perf/pmu-events/arch/x86/skylake/memory.json index f197b4c7695b..7bd3ae338343 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/memory.json @@ -1,1604 +1,1611 @@ [ { - "PublicDescription": "Number of times a TSX line had a cache conflict.", - "EventCode": "0x54", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "TX_MEM.ABORT_CONFLICT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x54", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "TX_MEM.ABORT_CAPACITY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000080004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", - "EventCode": "0x54", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0104000001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", - "EventCode": "0x54", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000100002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", - "EventCode": "0x54", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0084008000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", - "EventCode": "0x54", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x007C400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times we could not allocate Lock Buffer.", - "EventCode": "0x54", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x043C408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x5d", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "TX_EXEC.MISC1", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0204000004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", - "EventCode": "0x5d", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "TX_EXEC.MISC2", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000088000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", - "EventCode": "0x5d", + "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "TX_EXEC.MISC3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "RTM region detected inside HLE.", - "EventCode": "0x5d", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "TX_EXEC.MISC4", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00BC408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", - "EventCode": "0x5d", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "TX_EXEC.MISC5", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103C408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", + "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_TIMER", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x60", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x023C400001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x60", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0204000002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_MEM", + "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xA3", + "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "PublicDescription": "Demand Data Read requests who miss L3 cache.", - "EventCode": "0xB0", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000080002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", - "BriefDescription": "Demand Data Read requests who miss L3 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", - "EventCode": "0xC3", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "SKL089", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1004008000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", - "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", - "EventCode": "0xC8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "HLE_RETIRED.START", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution started.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Number of times HLE commit succeeded.", - "EventCode": "0xC8", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "HLE_RETIRED.COMMIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution successfully committed", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "Number of times HLE abort was triggered. (PEBS)", - "EventCode": "0xC8", + "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "HLE_RETIRED.ABORTED", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xC8", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "HLE_RETIRED.ABORTED_MEM", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000028000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "HLE_RETIRED.ABORTED_TIMER", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFC408000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", - "EventCode": "0xC8", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", - "EventCode": "0xC8", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x043C400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "HLE_RETIRED.ABORTED_EVENTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000020004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", - "EventCode": "0xC9", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RTM_RETIRED.START", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution started.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0044000002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times RTM commit succeeded.", - "EventCode": "0xC9", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "RTM_RETIRED.COMMIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution successfully committed", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0204008000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "Number of times RTM abort was triggered. (PEBS)", - "EventCode": "0xC9", + "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "RTM_RETIRED.ABORTED", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", + "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", - "EventCode": "0xC9", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "RTM_RETIRED.ABORTED_MEM", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103C400001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC9", + "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "RTM_RETIRED.ABORTED_TIMER", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CONFLICT", + "PublicDescription": "Number of times a TSX line had a cache conflict.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", - "EventCode": "0xC9", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1004000001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.", - "EventCode": "0xC9", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000080001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", - "EventCode": "0xC9", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "RTM_RETIRED.ABORTED_EVENTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "2003", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x4", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", - "MSRIndex": "0x3F6", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0204000001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", "SampleAfterValue": "100003", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x8", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", - "MSRIndex": "0x3F6", - "SampleAfterValue": "50021", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000020002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x10", + "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", - "MSRIndex": "0x3F6", - "SampleAfterValue": "20011", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", + "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x20", + "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", - "MSRIndex": "0x3F6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC5", + "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x40", + "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", - "MSRIndex": "0x3F6", - "SampleAfterValue": "2003", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC4", + "PublicDescription": "RTM region detected inside HLE.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x80", + "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", - "MSRIndex": "0x3F6", - "SampleAfterValue": "1009", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC3", + "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x100", + "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", - "MSRIndex": "0x3F6", - "SampleAfterValue": "503", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC2", + "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PEBS": "2", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", - "EventCode": "0xCD", - "MSRValue": "0x200", + "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", - "MSRIndex": "0x3F6", - "SampleAfterValue": "101", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", - "TakenAlone": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC1", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FFC408000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0404000001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203C408000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0084000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103C408000", + "BriefDescription": "Number of times an RTM execution successfully committed", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.COMMIT", + "PublicDescription": "Number of times RTM commit succeeded.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043C408000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000100001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023C408000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103C400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013C408000", + "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00BC408000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000048000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007C408000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0104000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC4008000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x203C400004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x007C408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004008000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0104000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004008000", + "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_TIMER", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404008000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2004000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204008000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1004000004", + "Offcore": "1", + "PublicDescription": "Counts all demand code reads", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0044008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104008000", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0044000001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084008000", + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044008000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0084000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000408000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x043C400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001C8000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00BC400002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts any other requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000108000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000040004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000088000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000048000", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts any other requests", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000028000", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x20001C0004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FFC400004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203C400004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x013C400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103C400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0404008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043C400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0104008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023C400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013C400004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x013C408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00BC400004", + "BriefDescription": "Number of times an HLE execution successfully committed", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.COMMIT", + "PublicDescription": "Number of times HLE commit succeeded.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007C400004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000020001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC4000004", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x203C408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x023C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000040002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000004", + "BriefDescription": "Demand Data Read requests who miss L3 cache", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Demand Data Read requests who miss L3 cache.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00BC400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2004000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000004", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x023C400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFC400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400004", + "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001C0004", + "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_MEM", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100004", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "503", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x013C400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040004", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x203C400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020004", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x007C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FFC400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC4008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203C400002", + "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED", + "PEBS": "1", + "PublicDescription": "Number of times RTM abort was triggered.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103C400002", + "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED", + "PEBS": "1", + "PublicDescription": "Number of times HLE abort was triggered.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043C400002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x203C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023C400002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0404000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013C400002", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "20011", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00BC400002", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2004008000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007C400002", + "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", + "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC4000002", + "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000002", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC4000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000002", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "101", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000002", + "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CAPACITY", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0084000001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000002", + "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000002", + "BriefDescription": "Number of times an RTM execution started.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.START", + "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000002", + "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL089", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400002", + "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", + "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001C0002", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x20001C0002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", "BriefDescription": "Counts all demand data writes (RFOs)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1004000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts all demand data writes (RFOs)", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020002", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs)", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x20001C0001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FFC400001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC4000002", + "Offcore": "1", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x007C400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203C400001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000100004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103C400001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x00BC400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043C400001", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x043C400001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023C400001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013C400001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFC400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00BC400001", + "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_EVENTS", + "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", + "SampleAfterValue": "2000003", + "UMask": "0x80" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007C400001", + "BriefDescription": "Number of times an HLE execution started.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.START", + "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3FC4000001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFC400004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000001", + "BriefDescription": "Counts demand data reads", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2004000001", + "Offcore": "1", + "PublicDescription": "Counts demand data reads", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC4000001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000001", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "1009", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000001", + "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", + "PublicDescription": "Number of times we could not allocate Lock Buffer.", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000108000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000001", + "BriefDescription": "Counts all demand code reads", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0044000004", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand code reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x013C400002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000001", + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x6" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x023C408000", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts any other requests", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001C0001", + "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_EVENTS", + "SampleAfterValue": "2000003", + "UMask": "0x80" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100001", + "BriefDescription": "Counts any other requests", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x20001C8000", + "Offcore": "1", + "PublicDescription": "Counts any other requests", "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Counts demand data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2000040001", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts demand data reads", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080001", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040001", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "50021", + "TakenAlone": "1", + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data reads", - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020001", + "BriefDescription": "Counts all demand data writes (RFOs)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6, 0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0404000002", "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "PublicDescription": "Counts all demand data writes (RFOs)", + "SampleAfterValue": "100003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/other.json b/tools/perf/pmu-events/arch/x86/skylake/other.json index 84a316d380ac..1a3683f1de91 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/other.json +++ b/tools/perf/pmu-events/arch/x86/skylake/other.json @@ -1,48 +1,56 @@ [ { - "EventCode": "0x32", + "BriefDescription": "Number of PREFETCHW instructions executed.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "SW_PREFETCH_ACCESS.NTA", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", "SampleAfterValue": "2000003", - "BriefDescription": "Number of PREFETCHNTA instructions executed.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0x32", + "BriefDescription": "Number of PREFETCHT0 instructions executed.", "Counter": "0,1,2,3", - "UMask": "0x2", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", "EventName": "SW_PREFETCH_ACCESS.T0", "SampleAfterValue": "2000003", - "BriefDescription": "Number of PREFETCHT0 instructions executed.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0x32", + "BriefDescription": "Number of hardware interrupts received by the processor.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "SW_PREFETCH_ACCESS.T1_T2", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCB", + "EventName": "HW_INTERRUPTS.RECEIVED", + "PublicDescription": "Counts the number of hardware interruptions received by the processor.", + "SampleAfterValue": "203", + "UMask": "0x1" }, { + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of PREFETCHW instructions executed.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts the number of hardware interruptions received by the processor.", - "EventCode": "0xCB", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "HW_INTERRUPTS.RECEIVED", - "SampleAfterValue": "203", - "BriefDescription": "Number of hardware interrupts received by the processor.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x09", + "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET", + "SampleAfterValue": "2000003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json index 4a891fbbc4bb..f46e93a57fb4 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json @@ -1,967 +1,969 @@ [ { - "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", - "Counter": "Fixed counter 0", - "UMask": "0x1", - "EventName": "INST_RETIRED.ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 0" - }, - { - "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", - "Counter": "Fixed counter 1", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.THREAD", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when the thread is not in halt state", - "CounterHTOff": "Fixed counter 1" + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.ANY_P", + "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "SampleAfterValue": "2000003" }, { - "Counter": "Fixed counter 1", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 1" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", - "Counter": "Fixed counter 2", - "UMask": "0x3", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "UMask": "0x10" }, { - "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.", - "EventCode": "0x03", + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "LD_BLOCKS.STORE_FORWARD", - "SampleAfterValue": "100003", - "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x14", + "EventName": "ARITH.DIVIDER_ACTIVE", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", - "EventCode": "0x03", + "BriefDescription": "False dependencies in MOB due to partial compare on address.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "LD_BLOCKS.NO_SR", - "SampleAfterValue": "100003", - "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x07", - "Counter": "0,1,2,3", - "UMask": "0x1", "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", + "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", "SampleAfterValue": "100003", - "BriefDescription": "False dependencies in MOB due to partial compare on address.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", - "EventCode": "0x0D", + "BriefDescription": "Far branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "INT_MISC.RECOVERY_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "PublicDescription": "This event counts far branch instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x40" }, { - "EventCode": "0x0D", + "BriefDescription": "Counts the number of x87 uops dispatched.", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.X87", + "PublicDescription": "Counts the number of x87 uops executed.", "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x0D", + "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x4C", + "EventName": "LOAD_HIT_PRE.SW_PF", + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", - "EventCode": "0x0E", + "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "SampleAfterValue": "400009", + "UMask": "0x2" }, { - "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "EventCode": "0x0E", - "Invert": "1", + "BriefDescription": "Total execution stalls.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", - "EventCode": "0x0E", + "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x0E", - "Counter": "0,1,2,3", - "UMask": "0x20", "EventName": "UOPS_ISSUED.SLOW_LEA", "SampleAfterValue": "2000003", - "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0x14", + "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ARITH.DIVIDER_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "10", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "Invert": "1", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", - "EventCode": "0x3C", + "BriefDescription": "Thread cycles when thread is not in halt state", "Counter": "0,1,2,3", - "UMask": "0x0", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003" }, { - "EventCode": "0x3C", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", - "EventCode": "0x3C", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "Counter": "0,1,2,3", - "UMask": "0x0", - "EdgeDetect": "1", - "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", - "SampleAfterValue": "100007", - "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0x3C", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "AnyThread": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "CounterHTOff": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0x3C", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0x3C", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x3C", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "Invert": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", - "EventCode": "0x4C", + "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LOAD_HIT_PRE.SW_PF", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", + "SampleAfterValue": "2000003", + "UMask": "0x40" }, { - "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", - "EventCode": "0x59", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "8", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", - "EventCode": "0x5E", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RS_EVENTS.EMPTY_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_ACTIVE", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", - "EventCode": "0x5E", - "Invert": "1", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", "SampleAfterValue": "2000003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", - "EventCode": "0x87", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ILD_STALL.LCP", - "SampleAfterValue": "2000003", - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 0", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "UMask": "0x2", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "UMask": "0x4", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "UMask": "0x8", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "UMask": "0x10", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 5", "Counter": "0,1,2,3", - "UMask": "0x20", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "UMask": "0x40", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", - "EventCode": "0xA1", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "UMask": "0x80", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" }, { - "PublicDescription": "Counts resource-related stall cycles.", - "EventCode": "0xa2", + "AnyThread": "1", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RESOURCE_STALLS.ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", - "EventCode": "0xA2", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "Counter": "1", + "CounterHTOff": "1", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.PREC_DIST", + "PEBS": "2", + "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_4_UOPS", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xA3", + "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x2" }, { - "EventCode": "0xA3", + "AnyThread": "1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x59", + "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", + "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "16", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xA3", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x14", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "20", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_NTAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x10" }, { - "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "Fixed counter 2", + "CounterHTOff": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x3" }, { - "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", - "EventCode": "0xA6", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "SampleAfterValue": "400009" }, { - "EventCode": "0xA6", + "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC1", + "EventName": "OTHER_ASSISTS.ANY", + "SampleAfterValue": "100003", + "UMask": "0x3f" }, { - "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", - "EventCode": "0xA8", + "BriefDescription": "Cycles without actually retired uops.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.UOPS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of Uops delivered by the LSD.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", - "EventCode": "0xA8", + "BriefDescription": "Number of Uops delivered by the LSD.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA8", + "EventName": "LSD.UOPS", + "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", - "EventCode": "0xA8", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "25003", + "UMask": "0x2" }, { - "PublicDescription": "Number of uops to be executed per-thread each cycle.", - "EventCode": "0xB1", + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.THREAD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x87", + "EventName": "ILD_STALL.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", - "EventCode": "0xB1", - "Invert": "1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EdgeDetect": "1", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_END", + "Invert": "1", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "EventCode": "0xB1", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "16", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "EventCode": "0xB1", + "BriefDescription": "Taken branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "PublicDescription": "This event counts taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x20" }, { - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "EventCode": "0xB1", + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.NO_SR", + "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "SampleAfterValue": "100003", + "UMask": "0x8" }, { - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "EventCode": "0xB1", + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Number of uops executed from any thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE", + "BriefDescription": "Core cycles when the thread is not in halt state", + "Counter": "Fixed counter 1", + "CounterHTOff": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of uops executed on the core.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xB1", + "AnyThread": "1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EventCode": "0xB1", + "BriefDescription": "Direct and indirect near call instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "This event counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x2" }, { - "EventCode": "0xB1", + "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCC", + "EventName": "ROB_MISC_EVENTS.PAUSE_INST", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0xB1", + "BriefDescription": "Resource-related stall cycles", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xa2", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "Counts resource-related stall cycles.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB1", - "Invert": "1", + "BriefDescription": "Self-modifying code (SMC) detected.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "5", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x5" }, { - "PublicDescription": "Counts the number of x87 uops executed.", - "EventCode": "0xB1", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_EXECUTED.X87", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "25003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of x87 uops dispatched.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", - "EventCode": "0xC0", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "SKL091, SKL044", - "EventName": "INST_RETIRED.ANY_P", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x20" + }, + { + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "CounterMask": "20", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x14" }, { - "PEBS": "2", - "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", - "EventCode": "0xC0", - "Counter": "1", - "UMask": "0x1", - "Errata": "SKL091, SKL044", - "EventName": "INST_RETIRED.PREC_DIST", + "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", + "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "CounterHTOff": "1" + "UMask": "0x1" }, { - "PEBS": "2", - "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)", - "EventCode": "0xC0", - "Invert": "1", + "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.", "Counter": "0,2,3", - "UMask": "0x1", + "CounterHTOff": "0,2,3", + "CounterMask": "10", "Errata": "SKL091, SKL044", + "EventCode": "0xC0", "EventName": "INST_RETIRED.TOTAL_CYCLES_PS", + "Invert": "1", + "PEBS": "2", + "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)", "SampleAfterValue": "2000003", - "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.", - "CounterMask": "10", - "CounterHTOff": "0,2,3" + "UMask": "0x1" }, { - "EventCode": "0xC1", + "BriefDescription": "Retirement slots used.", "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "OTHER_ASSISTS.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC2", - "Counter": "0,1,2,3", - "UMask": "0x2", "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "Counts the retirement slots used.", "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "This event counts cycles without actually retired uops.", - "EventCode": "0xC2", - "Invert": "1", + "AnyThread": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", - "EventCode": "0xC2", - "Invert": "1", + "BriefDescription": "Number of macro-fused uops retired. (non precise)", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MACRO_FUSED", + "PublicDescription": "Counts the number of macro-fused uops retired. (non precise)", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", - "CounterMask": "10", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "PublicDescription": "Number of machine clears (nukes) of any type.", - "EventCode": "0xC3", + "BriefDescription": "Increments whenever there is an update to the LBR array.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCC", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", - "EventCode": "0xC3", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MACHINE_CLEARS.SMC", - "SampleAfterValue": "100003", - "BriefDescription": "Self-modifying code (SMC) detected.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_CYCLES", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "PublicDescription": "Counts all (macro) branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Instructions retired from execution.", + "Counter": "Fixed counter 0", + "CounterHTOff": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA2", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", + "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", + "SampleAfterValue": "100007" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x4", + "CounterHTOff": "0,1,2,3", "Errata": "SKL091", + "EventCode": "0xC4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x8", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x4" + }, + { + "BriefDescription": "Return instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL091", + "EventCode": "0xC4", "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "PublicDescription": "This event counts return instructions retired.", "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "UMask": "0x10", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Counts all not taken macro branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x20", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", - "EventCode": "0xC4", + "BriefDescription": "Conditional branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x40", + "CounterHTOff": "0,1,2,3,4,5,6,7", "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of far branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PEBS": "1", + "PublicDescription": "This event counts conditional branch instructions retired.", "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x1", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "PEBS": "1", + "PublicDescription": "This event counts mispredicted conditional branch instructions retired.", "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PEBS": "1", - "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", - "EventCode": "0xC5", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired.", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "12", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "2000003", + "UMask": "0xc" }, { - "PEBS": "1", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "EventCode": "0xC5", + "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", - "EventCode": "0xCC", + "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Increments whenever there is an update to the LBR array.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" }, { - "EventCode": "0xCC", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "ROB_MISC_EVENTS.PAUSE_INST", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009" + }, + { + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", "SampleAfterValue": "2000003", - "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", - "EventCode": "0xE6", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index 8704efeb8d31..4cd246782dde 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -1,370 +1,371 @@ [ { - "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", "MetricGroup": "TopdownL1", - "MetricName": "Frontend_Bound", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." + "MetricName": "Frontend_Bound" }, { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", "MetricGroup": "TopdownL1_SMT", - "MetricName": "Frontend_Bound_SMT", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + "MetricName": "Frontend_Bound_SMT" }, { - "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", "MetricGroup": "TopdownL1", - "MetricName": "Bad_Speculation", - "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example." + "MetricName": "Bad_Speculation" }, { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", "MetricGroup": "TopdownL1_SMT", - "MetricName": "Bad_Speculation_SMT", - "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." + "MetricName": "Bad_Speculation_SMT" }, { + "MetricConstraint": "NO_NMI_WATCHDOG", + "MetricGroup": "TopdownL1", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", - "MetricGroup": "TopdownL1", - "MetricName": "Backend_Bound", - "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound." + "MetricName": "Backend_Bound" }, { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", "MetricGroup": "TopdownL1_SMT", - "MetricName": "Backend_Bound_SMT", - "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + "MetricName": "Backend_Bound_SMT" }, { - "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", "MetricGroup": "TopdownL1", - "MetricName": "Retiring", - "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. " + "MetricName": "Retiring" }, { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", "MetricGroup": "TopdownL1_SMT", - "MetricName": "Retiring_SMT", - "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU." + "MetricName": "Retiring_SMT" }, { - "BriefDescription": "Instructions Per Cycle (per Logical Processor)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "TopDownL1", + "BriefDescription": "Instructions Per Cycle (per Logical Processor)", + "MetricGroup": "Summary", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "BriefDescription": "Uops Per Instruction", "MetricGroup": "Pipeline;Retire", "MetricName": "UPI" }, { - "BriefDescription": "Instruction per taken branch", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", "MetricGroup": "Branches;Fetch_BW;PGO", "MetricName": "IpTB" }, { - "BriefDescription": "Branch instructions per taken branch. ", - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;PGO", - "MetricName": "BpTB" - }, - { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", - "MetricGroup": "PGO;IcMiss", - "MetricName": "IFetch_Line_Utilization" - }, - { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", - "MetricGroup": "DSB;Fetch_BW", - "MetricName": "DSB_Coverage" - }, - { - "BriefDescription": "Cycles Per Instruction (per Logical Processor)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", - "MetricGroup": "Pipeline;Summary", + "BriefDescription": "Cycles Per Instruction (per Logical Processor)", + "MetricGroup": "Pipeline", "MetricName": "CPI" }, { - "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", - "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", "MetricGroup": "TopDownL1_SMT", "MetricName": "SLOTS_SMT" }, { - "BriefDescription": "Instructions per Load (lower number means higher occurance rate)", - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", - "MetricGroup": "Instruction_Type", - "MetricName": "IpL" - }, - { - "BriefDescription": "Instructions per Store (lower number means higher occurance rate)", - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", - "MetricGroup": "Instruction_Type", - "MetricName": "IpS" - }, - { - "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", - "MetricGroup": "Branches;Instruction_Type", - "MetricName": "IpB" - }, - { - "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", - "MetricGroup": "Branches", - "MetricName": "IpCall" - }, - { - "BriefDescription": "Total number of retired Instructions", - "MetricExpr": "INST_RETIRED.ANY", - "MetricGroup": "Summary", - "MetricName": "Instructions" - }, - { - "BriefDescription": "Instructions Per Cycle (per physical core)", "MetricExpr": "INST_RETIRED.ANY / cycles", - "MetricGroup": "SMT", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", - "MetricGroup": "SMT", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC_SMT" }, { + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / cycles", "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles", "MetricGroup": "FLOPS", "MetricName": "FLOPc" }, { + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "FLOPS_SMT", "MetricName": "FLOPc_SMT" }, { + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", - "MetricGroup": "Pipeline", + "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", "MetricGroup": "BrMispredicts", "MetricName": "Branch_Misprediction_Cost" }, { - "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", "MetricGroup": "BrMispredicts_SMT", "MetricName": "Branch_Misprediction_Cost_SMT" }, { - "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", "MetricGroup": "BrMispredicts", "MetricName": "IpMispredict" }, { - "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core", "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", + "MetricGroup": "Instruction_Type", + "MetricName": "IpLoad" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", + "MetricGroup": "Instruction_Type", + "MetricName": "IpStore" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricGroup": "Branches;Instruction_Type", + "MetricName": "IpBranch" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTkBranch" + }, + { + "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )", + "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", + "MetricGroup": "FLOPS;FP_Arith;Instruction_Type", + "MetricName": "IpFLOP" + }, + { + "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", + "MetricGroup": "Summary;TopDownL1", + "MetricName": "Instructions" + }, + { + "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Fetch_BW", + "MetricName": "DSB_Coverage" + }, + { "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { - "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization", - "MetricConstraint": "NO_NMI_WATCHDOG" + "MetricName": "Page_Walks_Utilization" }, { + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", "MetricGroup": "TLB_SMT", "MetricName": "Page_Walks_Utilization_SMT" }, { - "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", "MetricGroup": "Memory_BW", "MetricName": "L1D_Cache_Fill_BW" }, { - "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", "MetricGroup": "Memory_BW", "MetricName": "L2_Cache_Fill_BW" }, { - "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricGroup": "Memory_BW", "MetricName": "L3_Cache_Fill_BW" }, { - "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", - "MetricGroup": "Memory_BW", + "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW;Offcore", "MetricName": "L3_Cache_Access_BW" }, { - "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", "MetricGroup": "Cache_Misses", "MetricName": "L1MPKI" }, { - "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", "MetricGroup": "Cache_Misses", "MetricName": "L2MPKI" }, { - "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;Offcore", "MetricName": "L2MPKI_All" }, { - "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", "MetricGroup": "Cache_Misses", "MetricName": "L2HPKI_All" }, { - "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", "MetricGroup": "Cache_Misses", "MetricName": "L3MPKI" }, { - "BriefDescription": "Average CPU Utilization", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", - "MetricGroup": "Summary", + "BriefDescription": "Average CPU Utilization", + "MetricGroup": "HPC;Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time", - "MetricGroup": "FLOPS;Summary", + "MetricGroup": "FLOPS;HPC", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )", "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", - "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", - "MetricGroup": "SMT;Summary", + "MetricGroup": "SMT", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Summary", + "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode", + "MetricGroup": "OS", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000", - "MetricGroup": "Memory_BW", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "HPC;Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, { + "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,cmask\\=1@", "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,thresh\\=1@", - "MetricGroup": "Memory_BW", - "MetricName": "DRAM_Parallel_Reads" + "MetricGroup": "Memory_BW;SoC", + "MetricName": "MEM_Parallel_Reads" }, { - "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )", "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )", - "MetricGroup": "", + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", + "MetricGroup": "Branches;OS", "MetricName": "IpFarBranch" }, { - "BriefDescription": "C3 residency percent per core", "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C3 residency percent per core", "MetricGroup": "Power", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C6 residency percent per core", "MetricGroup": "Power", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C7 residency percent per core", "MetricGroup": "Power", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C2 residency percent per package", "MetricGroup": "Power", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C3 residency percent per package", "MetricGroup": "Power", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C6 residency percent per package", "MetricGroup": "Power", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "BriefDescription": "C7 residency percent per package", "MetricGroup": "Power", "MetricName": "C7_Pkg_Residency" } diff --git a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json index 2bcba7daca14..432530d15c26 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json @@ -1,284 +1,284 @@ [ { - "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", - "EventCode": "0x08", + "BriefDescription": "Store misses in all DTLB levels that cause page walks", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "SampleAfterValue": "100003", - "BriefDescription": "Load misses in all DTLB levels that cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", - "SampleAfterValue": "2000003", - "BriefDescription": "Page walk completed due to a demand data load to a 4K page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", - "EventCode": "0x08", + "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", - "SampleAfterValue": "2000003", - "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "EventCode": "0x08", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", - "SampleAfterValue": "2000003", - "BriefDescription": "Page walk completed due to a demand data load to a 1G page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", + "SampleAfterValue": "100003", + "UMask": "0x10" }, { - "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", - "EventCode": "0x08", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", - "EventCode": "0x08", + "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAE", + "EventName": "ITLB.ITLB_FLUSH", + "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", + "SampleAfterValue": "100007", + "UMask": "0x1" }, { - "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", - "EventCode": "0x08", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", - "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_ACTIVE", + "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "100003", + "UMask": "0x10" }, { - "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", - "EventCode": "0x08", + "BriefDescription": "Loads that miss the DTLB and hit the STLB.", "Counter": "0,1,2,3", - "UMask": "0x20", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", "SampleAfterValue": "2000003", - "BriefDescription": "Loads that miss the DTLB and hit the STLB.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "DTLB flush attempts of the thread-specific entries", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xBD", + "EventName": "TLB_FLUSH.DTLB_THREAD", + "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", + "SampleAfterValue": "100007", + "UMask": "0x1" }, { - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "EventCode": "0x49", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", - "SampleAfterValue": "100003", - "BriefDescription": "Page walk completed due to a demand data store to a 4K page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", - "EventCode": "0x49", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", "SampleAfterValue": "100003", - "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", - "EventCode": "0x49", + "BriefDescription": "Misses at all ITLB levels that cause page walks", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", "SampleAfterValue": "100003", - "BriefDescription": "Page walk completed due to a demand data store to a 1G page", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", - "EventCode": "0x49", + "BriefDescription": "Stores that miss the DTLB and hit the STLB.", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", - "EventCode": "0x49", + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_STORE_MISSES.WALK_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" }, { - "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", - "EventCode": "0x49", + "BriefDescription": "Page walk completed due to a demand data store to a 4K page", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", - "EventCode": "0x49", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts completed page walks (1G page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "BriefDescription": "Stores that miss the DTLB and hit the STLB.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", - "EventCode": "0x4F", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "EPT.WALK_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" }, { - "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", - "EventCode": "0x85", + "BriefDescription": "Page walk completed due to a demand data load to a 4K page", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Misses at all ITLB levels that cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", - "EventCode": "0x85", + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.STLB_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", - "EventCode": "0x85", + "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "UMask": "0x4" }, { - "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", - "EventCode": "0x85", + "BriefDescription": "Load misses in all DTLB levels that cause page walks", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", - "EventCode": "0x85", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x4f", + "EventName": "EPT.WALK_PENDING", + "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", - "EventCode": "0x85", + "BriefDescription": "STLB flush attempts", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "ITLB_MISSES.WALK_PENDING", - "SampleAfterValue": "100003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xBD", + "EventName": "TLB_FLUSH.STLB_ANY", + "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", + "SampleAfterValue": "100007", + "UMask": "0x20" }, { - "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", - "EventCode": "0x85", + "BriefDescription": "Page walk completed due to a demand data load to a 1G page", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "ITLB_MISSES.WALK_ACTIVE", - "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "UMask": "0x8" }, { - "EventCode": "0x85", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ITLB_MISSES.STLB_HIT", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", "SampleAfterValue": "100003", - "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", - "EventCode": "0xAE", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB.ITLB_FLUSH", - "SampleAfterValue": "100007", - "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" }, { - "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", - "EventCode": "0xBD", + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "TLB_FLUSH.DTLB_THREAD", - "SampleAfterValue": "100007", - "BriefDescription": "DTLB flush attempts of the thread-specific entries", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" }, { - "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", - "EventCode": "0xBD", + "BriefDescription": "Page walk completed due to a demand data store to a 1G page", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "TLB_FLUSH.STLB_ANY", - "SampleAfterValue": "100007", - "BriefDescription": "STLB flush attempts", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x8" } ] \ No newline at end of file -- cgit v1.3.1 From 29396cd573da08ae9ab0b75925c2f6b3cabb9dfa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 26 Aug 2020 08:30:55 -0700 Subject: perf expr: Force encapsulation on expr_id_data This patch resolves some undefined behavior where variables in expr_id_data were accessed (for debugging) without being defined. To better enforce the tagged union behavior, the struct is moved into expr.c and accessors provided. Tag values (kinds) are explicitly identified. Signed-off-by: Ian Rogers Reviewed-By: Kajol Jain Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20200826153055.2067780-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 68 +++++++++++++++++++++++++++++++++++++------ tools/perf/util/expr.h | 17 +++-------- tools/perf/util/expr.y | 2 +- tools/perf/util/metricgroup.c | 4 +-- 4 files changed, 66 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 53482ef53c41..a850fd0be3ee 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -17,6 +17,29 @@ extern int expr_debug; #endif +struct expr_id_data { + union { + double val; + struct { + double val; + const char *metric_name; + const char *metric_expr; + } ref; + struct expr_id *parent; + }; + + enum { + /* Holding a double value. */ + EXPR_ID_DATA__VALUE, + /* Reference to another metric. */ + EXPR_ID_DATA__REF, + /* A reference but the value has been computed. */ + EXPR_ID_DATA__REF_VALUE, + /* A parent is remembered for the recursion check. */ + EXPR_ID_DATA__PARENT, + } kind; +}; + static size_t key_hash(const void *key, void *ctx __maybe_unused) { const char *str = (const char *)key; @@ -48,6 +71,7 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) return -ENOMEM; data_ptr->parent = ctx->parent; + data_ptr->kind = EXPR_ID_DATA__PARENT; ret = hashmap__set(&ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); @@ -69,7 +93,7 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) if (!data_ptr) return -ENOMEM; data_ptr->val = val; - data_ptr->is_ref = false; + data_ptr->kind = EXPR_ID_DATA__VALUE; ret = hashmap__set(&ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); @@ -114,8 +138,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) */ data_ptr->ref.metric_name = ref->metric_name; data_ptr->ref.metric_expr = ref->metric_expr; - data_ptr->ref.counted = false; - data_ptr->is_ref = true; + data_ptr->kind = EXPR_ID_DATA__REF; ret = hashmap__set(&ctx->ids, name, data_ptr, (const void **)&old_key, (void **)&old_data); @@ -148,17 +171,30 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, data = *datap; - pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n", - data->is_ref, data->ref.counted, data->val, id); - - if (data->is_ref && !data->ref.counted) { - data->ref.counted = true; + switch (data->kind) { + case EXPR_ID_DATA__VALUE: + pr_debug2("lookup(%s): val %f\n", id, data->val); + break; + case EXPR_ID_DATA__PARENT: + pr_debug2("lookup(%s): parent %s\n", id, data->parent->id); + break; + case EXPR_ID_DATA__REF: + pr_debug2("lookup(%s): ref metric name %s\n", id, + data->ref.metric_name); pr_debug("processing metric: %s ENTRY\n", id); - if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) { + data->kind = EXPR_ID_DATA__REF_VALUE; + if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) { pr_debug("%s failed to count\n", id); return -1; } pr_debug("processing metric: %s EXIT: %f\n", id, data->val); + break; + case EXPR_ID_DATA__REF_VALUE: + pr_debug2("lookup(%s): ref val %f metric name %s\n", id, + data->ref.val, data->ref.metric_name); + break; + default: + assert(0); /* Unreachable. */ } return 0; @@ -241,3 +277,17 @@ int expr__find_other(const char *expr, const char *one, return ret; } + +double expr_id_data__value(const struct expr_id_data *data) +{ + if (data->kind == EXPR_ID_DATA__VALUE) + return data->val; + assert(data->kind == EXPR_ID_DATA__REF_VALUE); + return data->ref.val; +} + +struct expr_id *expr_id_data__parent(struct expr_id_data *data) +{ + assert(data->kind == EXPR_ID_DATA__PARENT); + return data->parent; +} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index fc2b5e824a66..dcf8d19b83c8 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -23,19 +23,7 @@ struct expr_parse_ctx { struct expr_id *parent; }; -struct expr_id_data { - union { - double val; - struct { - const char *metric_name; - const char *metric_expr; - bool counted; - } ref; - struct expr_id *parent; - }; - - bool is_ref; -}; +struct expr_id_data; struct expr_scanner_ctx { int start_token; @@ -57,4 +45,7 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx, int expr__find_other(const char *expr, const char *one, struct expr_parse_ctx *ids, int runtime); +double expr_id_data__value(const struct expr_id_data *data); +struct expr_id *expr_id_data__parent(struct expr_id_data *data); + #endif diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index d34b370391c6..b2ada8f8309a 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -93,7 +93,7 @@ expr: NUMBER YYABORT; } - $$ = data->val; + $$ = expr_id_data__value(data); free($1); } | expr '|' expr { $$ = (long)$1 | (long)$3; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 060454a17293..81d201c8b833 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -833,7 +833,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa if (ret) return ret; - p = data->parent; + p = expr_id_data__parent(data); while (p->parent) { if (!strcmp(p->id, id)) { @@ -854,7 +854,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa } p->id = strdup(id); - p->parent = data->parent; + p->parent = expr_id_data__parent(data); *parent = p; return p->id ? 0 : -ENOMEM; -- cgit v1.3.1 From 568beb27959b0515d325ea1c6cf211eed2d66740 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 13 Nov 2020 10:20:53 -0800 Subject: perf test: Avoid an msan warning in a copied stack. This fix is for a failure that occurred in the DWARF unwind perf test. Stack unwinders may probe memory when looking for frames. Memory sanitizer will poison and track uninitialized memory on the stack, and on the heap if the value is copied to the heap. This can lead to false memory sanitizer failures for the use of an uninitialized value. Avoid this problem by removing the poison on the copied stack. The full msan failure with track origins looks like: ==2168==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x559ceb10755b in handle_cfi elfutils/libdwfl/frame_unwind.c:648:8 #1 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #2 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #3 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #4 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #5 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #6 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #7 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #8 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #9 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #10 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #11 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #12 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #13 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #14 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #15 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #16 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #17 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #18 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #19 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #20 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #21 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #22 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #23 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceb106acf in __libdwfl_frame_reg_set elfutils/libdwfl/frame_unwind.c:77:22 #1 0x559ceb106acf in handle_cfi elfutils/libdwfl/frame_unwind.c:627:13 #2 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #3 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #4 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #5 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #6 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #7 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #8 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #9 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #10 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #11 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #12 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #13 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #14 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #15 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #16 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #17 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #18 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #19 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #20 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #21 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #22 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #23 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #24 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceb106a54 in handle_cfi elfutils/libdwfl/frame_unwind.c:613:9 #1 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #2 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #3 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #4 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #5 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #6 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #7 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #8 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #9 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #10 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #11 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #12 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #13 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #14 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #15 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #16 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #17 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #18 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #19 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #20 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #21 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #22 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #23 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceaff8800 in memory_read tools/perf/util/unwind-libdw.c:156:10 #1 0x559ceb10f053 in expr_eval elfutils/libdwfl/frame_unwind.c:501:13 #2 0x559ceb1060cc in handle_cfi elfutils/libdwfl/frame_unwind.c:603:18 #3 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #4 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #5 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #6 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #7 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #8 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #9 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #10 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #11 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #12 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #13 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #14 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #15 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #16 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #17 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #18 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #19 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #20 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #21 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #22 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #23 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #24 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #25 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559cea9027d9 in __msan_memcpy llvm/llvm-project/compiler-rt/lib/msan/msan_interceptors.cpp:1558:3 #1 0x559cea9d2185 in sample_ustack tools/perf/arch/x86/tests/dwarf-unwind.c:41:2 #2 0x559cea9d202c in test__arch_unwind_sample tools/perf/arch/x86/tests/dwarf-unwind.c:72:9 #3 0x559ceabc9cbd in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:106:6 #4 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #5 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #6 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #7 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #8 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #9 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #10 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #11 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #12 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #13 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #14 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #15 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #16 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #17 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was created by an allocation of 'bf' in the stack frame of function 'perf_event__synthesize_mmap_events' #0 0x559ceafc5f60 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:445 SUMMARY: MemorySanitizer: use-of-uninitialized-value elfutils/libdwfl/frame_unwind.c:648:8 in handle_cfi Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: clang-built-linux@googlegroups.com Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201113182053.754625-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/dwarf-unwind.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 4e40402a4f81..478078fb0f22 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -38,6 +38,13 @@ static int sample_ustack(struct perf_sample *sample, stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size; memcpy(buf, (void *) sp, stack_size); +#ifdef MEMORY_SANITIZER + /* + * Copying the stack may copy msan poison, avoid false positives in the + * unwinder by removing the poison here. + */ + __msan_unpoison(buf, stack_size); +#endif stack->data = (char *) buf; stack->size = stack_size; return 0; -- cgit v1.3.1 From 08d3e27718bd45ea3284b1b99a2082a233b8667c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:36 +0100 Subject: KVM: selftests: Make test skipping consistent Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-12-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/kvm_pv_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/user_msr_test.c | 6 +++++- .../selftests/kvm/x86_64/vmx_preemption_timer_test.c | 14 +++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index b10a27485bad..732b244d6956 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -211,8 +211,8 @@ int main(void) struct kvm_vm *vm; if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { - pr_info("will skip kvm paravirt restriction tests.\n"); - return 0; + print_skip("KVM_CAP_ENFORCE_PV_FEATURE_CPUID not supported"); + exit(KSFT_SKIP); } vm = vm_create_default(VCPU_ID, 0, guest_main); diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c index cbe1b08890ff..f2a667ca49c1 100644 --- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c @@ -209,7 +209,11 @@ int main(int argc, char *argv[]) run = vcpu_state(vm, VCPU_ID); rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); - TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + if (!rc) { + print_skip("KVM_CAP_X86_USER_SPACE_MSR not supported"); + exit(KSFT_SKIP); + } + vm_enable_cap(vm, &cap); rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index a7737af1224f..25b783070d21 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -169,6 +169,11 @@ int main(int argc, char *argv[]) */ nested_vmx_check_supported(); + if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) { + print_skip("KVM_CAP_NESTED_STATE not supported"); + exit(KSFT_SKIP); + } + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); @@ -176,13 +181,8 @@ int main(int argc, char *argv[]) vcpu_regs_get(vm, VCPU_ID, ®s1); - if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); - } else { - pr_info("will skip vmx preemption timer checks\n"); - goto done; - } + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (stage = 1;; stage++) { _vcpu_run(vm, VCPU_ID); -- cgit v1.3.1 From 22f232d134e142022f5e4cf2de4587a34d5b7d65 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 11 Nov 2020 13:26:35 +0100 Subject: KVM: selftests: x86: Set supported CPUIDs on default VM Almost all tests do this anyway and the ones that don't don't appear to care. Only vmx_set_nested_state_test assumes that a feature (VMX) is disabled until later setting the supported CPUIDs. It's better to disable that explicitly anyway. Signed-off-by: Andrew Jones Message-Id: <20201111122636.73346-11-drjones@redhat.com> [Restore CPUID_VMX, or vmx_set_nested_state breaks. - Paolo] Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 3 --- .../testing/selftests/kvm/include/perf_test_util.h | 4 ---- tools/testing/selftests/kvm/lib/kvm_util.c | 11 +++++++++-- .../testing/selftests/kvm/set_memory_region_test.c | 2 -- .../selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 1 - tools/testing/selftests/kvm/x86_64/debug_regs.c | 1 - tools/testing/selftests/kvm/x86_64/evmcs_test.c | 2 -- tools/testing/selftests/kvm/x86_64/smm_test.c | 2 -- tools/testing/selftests/kvm/x86_64/state_test.c | 1 - .../testing/selftests/kvm/x86_64/svm_vmcall_test.c | 1 - tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c | 1 - tools/testing/selftests/kvm/x86_64/user_msr_test.c | 1 - .../selftests/kvm/x86_64/vmx_apic_access_test.c | 1 - .../kvm/x86_64/vmx_close_while_nested_test.c | 1 - .../selftests/kvm/x86_64/vmx_dirty_log_test.c | 1 - .../kvm/x86_64/vmx_preemption_timer_test.c | 1 - .../kvm/x86_64/vmx_set_nested_state_test.c | 21 +++++++++++++++++++++ .../selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 1 - 18 files changed, 30 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b1f8731721b9..471baecb7772 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -740,9 +740,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); -#ifdef __x86_64__ - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); -#endif ucall_init(vm, NULL); /* Export the shared variables to the guest */ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 2618052057b1..239421e4f6b8 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -179,10 +179,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) vm_vcpu_add_default(vm, vcpu_id, guest_code); -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); -#endif - vcpu_args->vcpu_id = vcpu_id; vcpu_args->gva = guest_test_virt_mem + (vcpu_id * vcpu_memory_bytes); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index df2035ac54a6..b2c426adb87f 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -311,8 +311,15 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, vm_create_irqchip(vm); #endif - for (i = 0; i < nr_vcpus; ++i) - vm_vcpu_add_default(vm, vcpuids ? vcpuids[i] : i, guest_code); + for (i = 0; i < nr_vcpus; ++i) { + uint32_t vcpuid = vcpuids ? vcpuids[i] : i; + + vm_vcpu_add_default(vm, vcpuid, guest_code); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); +#endif + } return vm; } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index b3ece55a2da6..5fa5823661da 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -121,8 +121,6 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code) vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP, MEM_REGION_GPA, MEM_REGION_SLOT, MEM_REGION_SIZE / getpagesize(), 0); diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 140e91901582..f40fd097cb35 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -81,7 +81,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); while (1) { diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index 2fc6b3af81a1..6097a8283377 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -85,7 +85,6 @@ int main(void) } vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); /* Test software BPs - int3 */ diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 757928199f19..37b8a78f6b74 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -92,8 +92,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - if (!nested_vmx_supported() || !kvm_check_cap(KVM_CAP_NESTED_STATE) || !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index ae39a220609f..613c42c5a9b8 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -102,8 +102,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - run = vcpu_state(vm, VCPU_ID); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index f6c8b9042f8a..32854c1462ad 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -165,7 +165,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); vcpu_regs_get(vm, VCPU_ID, ®s1); diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c index 0e1adb4e3199..be2ca157485b 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c @@ -44,7 +44,6 @@ int main(int argc, char *argv[]) nested_svm_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_alloc_svm(vm, &svm_gva); vcpu_args_set(vm, VCPU_ID, 1, svm_gva); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index f8e761149daa..e357d8e222d4 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -107,7 +107,6 @@ int main(void) uint64_t val; vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); val = 0; ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c index f2a667ca49c1..fe88b98908ee 100644 --- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c @@ -205,7 +205,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c index 1f65342d6cb7..d14888b34adb 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -87,7 +87,6 @@ int main(int argc, char *argv[]) nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); kvm_get_cpu_address_width(&paddr_width, &vaddr_width); high_gpa = (1ul << paddr_width) - getpagesize(); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index fe40ade06a49..2835a17f1b7a 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -57,7 +57,6 @@ int main(int argc, char *argv[]) nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ vcpu_alloc_vmx(vm, &vmx_pages_gva); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index e894a638a155..537de1068554 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -82,7 +82,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); run = vcpu_state(vm, VCPU_ID); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index 25b783070d21..a07480aed397 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -176,7 +176,6 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); run = vcpu_state(vm, VCPU_ID); vcpu_regs_get(vm, VCPU_ID, ®s1); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index d59f3eb67c8f..5827b9bae468 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -244,6 +244,22 @@ void test_vmx_nested_state(struct kvm_vm *vm) free(state); } +void disable_vmx(struct kvm_vm *vm) +{ + struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid(); + int i; + + for (i = 0; i < cpuid->nent; ++i) + if (cpuid->entries[i].function == 1 && + cpuid->entries[i].index == 0) + break; + TEST_ASSERT(i != cpuid->nent, "CPUID function 1 not found"); + + cpuid->entries[i].ecx &= ~CPUID_VMX; + vcpu_set_cpuid(vm, VCPU_ID, cpuid); + cpuid->entries[i].ecx |= CPUID_VMX; +} + int main(int argc, char *argv[]) { struct kvm_vm *vm; @@ -264,6 +280,11 @@ int main(int argc, char *argv[]) vm = vm_create_default(VCPU_ID, 0, 0); + /* + * First run tests with VMX disabled to check error handling. + */ + disable_vmx(vm); + /* Passing a NULL kvm_nested_state causes a EFAULT. */ test_nested_state_expect_efault(vm, NULL); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index fbe8417cbc2c..7e33a350b053 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -132,7 +132,6 @@ int main(int argc, char *argv[]) nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ vcpu_alloc_vmx(vm, &vmx_pages_gva); -- cgit v1.3.1 From 2acc3c1bc8e98bc66b1badec42e9ea205b4fcdaa Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 16 Nov 2020 18:16:33 +0800 Subject: selftests/bpf: Fix error return code in run_getsockopt_test() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 65b4414a05eb ("selftests/bpf: add sockopt test that exercises BPF_F_ALLOW_MULTI") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201116101633.64627-1-wanghai38@huawei.com --- tools/testing/selftests/bpf/prog_tests/sockopt_multi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 29188d6f5c8d..51fac975b316 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -138,7 +138,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, */ buf = 0x40; - if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) { + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { log_err("Failed to call setsockopt(IP_TOS)"); goto detach; } -- cgit v1.3.1 From de91e631bdc7e6411989e1a9ab65501a31527e0b Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Sun, 15 Nov 2020 10:46:35 +0000 Subject: libbpf: bpf__find_by_name[_kind] should use btf__get_nr_types() When operating on split BTF, btf__find_by_name[_kind] will not iterate over all types since they use btf->nr_types to show the number of types to iterate over. For split BTF this is the number of types _on top of base BTF_, so it will underestimate the number of types to iterate over, especially for vmlinux + module BTF, where the latter is much smaller. Use btf__get_nr_types() instead. Fixes: ba451366bf44 ("libbpf: Implement basic split BTF support") Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1605437195-2175-1-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/btf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d0d064c6d31..8ff46cd30ca1 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -674,12 +674,12 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) __s32 btf__find_by_name(const struct btf *btf, const char *type_name) { - __u32 i; + __u32 i, nr_types = btf__get_nr_types(btf); if (!strcmp(type_name, "void")) return 0; - for (i = 1; i <= btf->nr_types; i++) { + for (i = 1; i <= nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name = btf__name_by_offset(btf, t->name_off); @@ -693,12 +693,12 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind) { - __u32 i; + __u32 i, nr_types = btf__get_nr_types(btf); if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) return 0; - for (i = 1; i <= btf->nr_types; i++) { + for (i = 1; i <= nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name; -- cgit v1.3.1 From ee415d73dcc24caef7f6bbf292dcc365613d2188 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 15 Nov 2020 14:06:23 +0200 Subject: tools/testing/scatterlist: Fix test to compile and run Add missing define of ALIGN_DOWN to make the test build and run. In addition, __sg_alloc_table_from_pages now support unaligned maximum segment, so adapt the test result accordingly. Fixes: 07da1223ec93 ("lib/scatterlist: Add support in dynamic allocation of SG table from pages") Link: https://lore.kernel.org/r/20201115120623.139113-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- tools/testing/scatterlist/linux/mm.h | 1 + tools/testing/scatterlist/main.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 6ae907f375d2..f9a12005fcea 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -33,6 +33,7 @@ typedef unsigned long dma_addr_t; #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index b2c7e9f7b8d3..f561aed7c657 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -52,9 +52,9 @@ int main(void) { const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; struct test *test, tests[] = { - { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, - { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, + { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, + { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 }, { 0, 1, pfn(0), 1, sgmax, 1 }, { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 }, -- cgit v1.3.1 From 3f6719c7b62f0327c9091e26d0da10e65668229e Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 17 Nov 2020 23:29:28 +0000 Subject: bpf: Add bpf_bprm_opts_set helper The helper allows modification of certain bits on the linux_binprm struct starting with the secureexec bit which can be updated using the BPF_F_BPRM_SECUREEXEC flag. secureexec can be set by the LSM for privilege gaining executions to set the AT_SECURE auxv for glibc. When set, the dynamic linker disables the use of certain environment variables (like LD_PRELOAD). Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201117232929.2156341-1-kpsingh@chromium.org --- include/uapi/linux/bpf.h | 16 ++++++++++++++++ kernel/bpf/bpf_lsm.c | 26 ++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 16 ++++++++++++++++ 4 files changed, 60 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 162999b12790..a52299b80b9d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3787,6 +3787,16 @@ union bpf_attr { * *ARG_PTR_TO_BTF_ID* of type *task_struct*. * Return * Pointer to the current task. + * + * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags) + * Description + * Set or clear certain options on *bprm*: + * + * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit + * which sets the **AT_SECURE** auxv for glibc. The bit + * is cleared if the flag is not specified. + * Return + * **-EINVAL** if invalid *flags* are passed, zero otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3948,6 +3958,7 @@ union bpf_attr { FN(task_storage_get), \ FN(task_storage_delete), \ FN(get_current_task_btf), \ + FN(bprm_opts_set), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_IP, }; +/* Flags for bpf_bprm_opts_set helper */ +enum { + BPF_F_BPRM_SECUREEXEC = (1ULL << 0), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 553107f4706a..b4f27a874092 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,29 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, return 0; } +/* Mask for all the currently supported BPRM option flags */ +#define BPF_F_BRPM_OPTS_MASK BPF_F_BPRM_SECUREEXEC + +BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags) +{ + if (flags & ~BPF_F_BRPM_OPTS_MASK) + return -EINVAL; + + bprm->secureexec = (flags & BPF_F_BPRM_SECUREEXEC); + return 0; +} + +BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm) + +const static struct bpf_func_proto bpf_bprm_opts_set_proto = { + .func = bpf_bprm_opts_set, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_bprm_opts_set_btf_ids[0], + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -71,6 +95,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: return &bpf_task_storage_delete_proto; + case BPF_FUNC_bprm_opts_set: + return &bpf_bprm_opts_set_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 31484377b8b1..c5bc947a70ad 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -418,6 +418,7 @@ class PrinterHelpers(Printer): 'struct bpf_tcp_sock', 'struct bpf_tunnel_key', 'struct bpf_xfrm_state', + 'struct linux_binprm', 'struct pt_regs', 'struct sk_reuseport_md', 'struct sockaddr', @@ -465,6 +466,7 @@ class PrinterHelpers(Printer): 'struct bpf_tcp_sock', 'struct bpf_tunnel_key', 'struct bpf_xfrm_state', + 'struct linux_binprm', 'struct pt_regs', 'struct sk_reuseport_md', 'struct sockaddr', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 162999b12790..a52299b80b9d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3787,6 +3787,16 @@ union bpf_attr { * *ARG_PTR_TO_BTF_ID* of type *task_struct*. * Return * Pointer to the current task. + * + * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags) + * Description + * Set or clear certain options on *bprm*: + * + * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit + * which sets the **AT_SECURE** auxv for glibc. The bit + * is cleared if the flag is not specified. + * Return + * **-EINVAL** if invalid *flags* are passed, zero otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3948,6 +3958,7 @@ union bpf_attr { FN(task_storage_get), \ FN(task_storage_delete), \ FN(get_current_task_btf), \ + FN(bprm_opts_set), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_IP, }; +/* Flags for bpf_bprm_opts_set helper */ +enum { + BPF_F_BPRM_SECUREEXEC = (1ULL << 0), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ -- cgit v1.3.1 From ea87ae85c9b31303a2e9d4c769d9f3ee8a3a60d1 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 17 Nov 2020 23:29:29 +0000 Subject: bpf: Add tests for bpf_bprm_opts_set helper The test forks a child process, updates the local storage to set/unset the securexec bit. The BPF program in the test attaches to bprm_creds_for_exec which checks the local storage of the current task to set the secureexec bit on the binary parameters (bprm). The child then execs a bash command with the environment variable TMPDIR set in the envp. The bash command returns a different exit code based on its observed value of the TMPDIR variable. Since TMPDIR is one of the variables that is ignored by the dynamic loader when the secureexec bit is set, one should expect the child execution to not see this value when the secureexec bit is set. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201117232929.2156341-2-kpsingh@chromium.org --- .../selftests/bpf/prog_tests/test_bprm_opts.c | 116 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/bprm_opts.c | 34 ++++++ 2 files changed, 150 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c create mode 100644 tools/testing/selftests/bpf/progs/bprm_opts.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c new file mode 100644 index 000000000000..2559bb775762 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2020 Google LLC. + */ + +#include +#include + +#include "bprm_opts.skel.h" +#include "network_helpers.h" + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open 434 +#endif + +static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL }; + +static inline int sys_pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int update_storage(int map_fd, int secureexec) +{ + int task_fd, ret = 0; + + task_fd = sys_pidfd_open(getpid(), 0); + if (task_fd < 0) + return errno; + + ret = bpf_map_update_elem(map_fd, &task_fd, &secureexec, BPF_NOEXIST); + if (ret) + ret = errno; + + close(task_fd); + return ret; +} + +static int run_set_secureexec(int map_fd, int secureexec) +{ + int child_pid, child_status, ret, null_fd; + + child_pid = fork(); + if (child_pid == 0) { + null_fd = open("/dev/null", O_WRONLY); + if (null_fd == -1) + exit(errno); + dup2(null_fd, STDOUT_FILENO); + dup2(null_fd, STDERR_FILENO); + close(null_fd); + + /* Ensure that all executions from hereon are + * secure by setting a local storage which is read by + * the bprm_creds_for_exec hook and sets bprm->secureexec. + */ + ret = update_storage(map_fd, secureexec); + if (ret) + exit(ret); + + /* If the binary is executed with securexec=1, the dynamic + * loader ingores and unsets certain variables like LD_PRELOAD, + * TMPDIR etc. TMPDIR is used here to simplify the example, as + * LD_PRELOAD requires a real .so file. + * + * If the value of TMPDIR is set, the bash command returns 10 + * and if the value is unset, it returns 20. + */ + execle("/bin/bash", "bash", "-c", + "[[ -z \"${TMPDIR}\" ]] || exit 10 && exit 20", NULL, + bash_envp); + exit(errno); + } else if (child_pid > 0) { + waitpid(child_pid, &child_status, 0); + ret = WEXITSTATUS(child_status); + + /* If a secureexec occurred, the exit status should be 20 */ + if (secureexec && ret == 20) + return 0; + + /* If normal execution happened, the exit code should be 10 */ + if (!secureexec && ret == 10) + return 0; + } + + return -EINVAL; +} + +void test_test_bprm_opts(void) +{ + int err, duration = 0; + struct bprm_opts *skel = NULL; + + skel = bprm_opts__open_and_load(); + if (CHECK(!skel, "skel_load", "skeleton failed\n")) + goto close_prog; + + err = bprm_opts__attach(skel); + if (CHECK(err, "attach", "attach failed: %d\n", err)) + goto close_prog; + + /* Run the test with the secureexec bit unset */ + err = run_set_secureexec(bpf_map__fd(skel->maps.secure_exec_task_map), + 0 /* secureexec */); + if (CHECK(err, "run_set_secureexec:0", "err = %d\n", err)) + goto close_prog; + + /* Run the test with the secureexec bit set */ + err = run_set_secureexec(bpf_map__fd(skel->maps.secure_exec_task_map), + 1 /* secureexec */); + if (CHECK(err, "run_set_secureexec:1", "err = %d\n", err)) + goto close_prog; + +close_prog: + bprm_opts__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c new file mode 100644 index 000000000000..5bfef2887e70 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bprm_opts.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include "vmlinux.h" +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} secure_exec_task_map SEC(".maps"); + +SEC("lsm/bprm_creds_for_exec") +int BPF_PROG(secure_exec, struct linux_binprm *bprm) +{ + int *secureexec; + + secureexec = bpf_task_storage_get(&secure_exec_task_map, + bpf_get_current_task_btf(), 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + + if (secureexec && *secureexec) + bpf_bprm_opts_set(bprm, BPF_F_BPRM_SECUREEXEC); + + return 0; +} -- cgit v1.3.1 From 2adcba79e69d4a4c0ac3bb86f466d8b5df301608 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 13 Nov 2020 00:01:31 +0200 Subject: selftests/x86: Add a selftest for SGX Add a selftest for SGX. It is a trivial test where a simple enclave copies one 64-bit word of memory between two memory locations, but ensures that all SGX hardware and software infrastructure is functioning. Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Cc: linux-kselftest@vger.kernel.org Link: https://lkml.kernel.org/r/20201112220135.165028-21-jarkko@kernel.org --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/sgx/.gitignore | 2 + tools/testing/selftests/sgx/Makefile | 53 +++ tools/testing/selftests/sgx/call.S | 44 +++ tools/testing/selftests/sgx/defines.h | 21 ++ tools/testing/selftests/sgx/load.c | 277 +++++++++++++++ tools/testing/selftests/sgx/main.c | 246 ++++++++++++++ tools/testing/selftests/sgx/main.h | 38 +++ tools/testing/selftests/sgx/sigstruct.c | 391 ++++++++++++++++++++++ tools/testing/selftests/sgx/test_encl.c | 20 ++ tools/testing/selftests/sgx/test_encl.lds | 40 +++ tools/testing/selftests/sgx/test_encl_bootstrap.S | 89 +++++ 12 files changed, 1222 insertions(+) create mode 100644 tools/testing/selftests/sgx/.gitignore create mode 100644 tools/testing/selftests/sgx/Makefile create mode 100644 tools/testing/selftests/sgx/call.S create mode 100644 tools/testing/selftests/sgx/defines.h create mode 100644 tools/testing/selftests/sgx/load.c create mode 100644 tools/testing/selftests/sgx/main.c create mode 100644 tools/testing/selftests/sgx/main.h create mode 100644 tools/testing/selftests/sgx/sigstruct.c create mode 100644 tools/testing/selftests/sgx/test_encl.c create mode 100644 tools/testing/selftests/sgx/test_encl.lds create mode 100644 tools/testing/selftests/sgx/test_encl_bootstrap.S (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d9c283503159..2e20e30a6faa 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -50,6 +50,7 @@ TARGETS += openat2 TARGETS += rseq TARGETS += rtc TARGETS += seccomp +TARGETS += sgx TARGETS += sigaltstack TARGETS += size TARGETS += sparc64 diff --git a/tools/testing/selftests/sgx/.gitignore b/tools/testing/selftests/sgx/.gitignore new file mode 100644 index 000000000000..fbaf0bda9a92 --- /dev/null +++ b/tools/testing/selftests/sgx/.gitignore @@ -0,0 +1,2 @@ +test_sgx +test_encl.elf diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile new file mode 100644 index 000000000000..d51c90663943 --- /dev/null +++ b/tools/testing/selftests/sgx/Makefile @@ -0,0 +1,53 @@ +top_srcdir = ../../../.. + +include ../lib.mk + +.PHONY: all clean + +CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \ + ../x86/trivial_64bit_program.c) + +ifndef OBJCOPY +OBJCOPY := $(CROSS_COMPILE)objcopy +endif + +INCLUDES := -I$(top_srcdir)/tools/include +HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC -z noexecstack +ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \ + -fno-stack-protector -mrdrnd $(INCLUDES) + +TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx + +ifeq ($(CAN_BUILD_X86_64), 1) +all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/test_encl.elf +endif + +$(OUTPUT)/test_sgx: $(OUTPUT)/main.o \ + $(OUTPUT)/load.o \ + $(OUTPUT)/sigstruct.o \ + $(OUTPUT)/call.o + $(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto + +$(OUTPUT)/main.o: main.c + $(CC) $(HOST_CFLAGS) -c $< -o $@ + +$(OUTPUT)/load.o: load.c + $(CC) $(HOST_CFLAGS) -c $< -o $@ + +$(OUTPUT)/sigstruct.o: sigstruct.c + $(CC) $(HOST_CFLAGS) -c $< -o $@ + +$(OUTPUT)/call.o: call.S + $(CC) $(HOST_CFLAGS) -c $< -o $@ + +$(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S + $(CC) $(ENCL_CFLAGS) -T $^ -o $@ + +EXTRA_CLEAN := \ + $(OUTPUT)/test_encl.elf \ + $(OUTPUT)/load.o \ + $(OUTPUT)/call.o \ + $(OUTPUT)/main.o \ + $(OUTPUT)/sigstruct.o \ + $(OUTPUT)/test_sgx \ + $(OUTPUT)/test_sgx.o \ diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S new file mode 100644 index 000000000000..4ecadc7490f4 --- /dev/null +++ b/tools/testing/selftests/sgx/call.S @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/** +* Copyright(c) 2016-20 Intel Corporation. +*/ + + .text + + .global sgx_call_vdso +sgx_call_vdso: + .cfi_startproc + push %r15 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r15, 0 + push %r14 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r14, 0 + push %r13 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r13, 0 + push %r12 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r12, 0 + push %rbx + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rbx, 0 + push $0 + .cfi_adjust_cfa_offset 8 + push 0x38(%rsp) + .cfi_adjust_cfa_offset 8 + call *eenter(%rip) + add $0x10, %rsp + .cfi_adjust_cfa_offset -0x10 + pop %rbx + .cfi_adjust_cfa_offset -8 + pop %r12 + .cfi_adjust_cfa_offset -8 + pop %r13 + .cfi_adjust_cfa_offset -8 + pop %r14 + .cfi_adjust_cfa_offset -8 + pop %r15 + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h new file mode 100644 index 000000000000..592c1ccf4576 --- /dev/null +++ b/tools/testing/selftests/sgx/defines.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2016-20 Intel Corporation. + */ + +#ifndef DEFINES_H +#define DEFINES_H + +#include + +#define PAGE_SIZE 4096 +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define __aligned(x) __attribute__((__aligned__(x))) +#define __packed __attribute__((packed)) + +#include "../../../../arch/x86/kernel/cpu/sgx/arch.h" +#include "../../../../arch/x86/include/asm/enclu.h" +#include "../../../../arch/x86/include/uapi/asm/sgx.h" + +#endif /* DEFINES_H */ diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c new file mode 100644 index 000000000000..9d43b75aaa55 --- /dev/null +++ b/tools/testing/selftests/sgx/load.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "defines.h" +#include "main.h" + +void encl_delete(struct encl *encl) +{ + if (encl->encl_base) + munmap((void *)encl->encl_base, encl->encl_size); + + if (encl->bin) + munmap(encl->bin, encl->bin_size); + + if (encl->fd) + close(encl->fd); + + if (encl->segment_tbl) + free(encl->segment_tbl); + + memset(encl, 0, sizeof(*encl)); +} + +static bool encl_map_bin(const char *path, struct encl *encl) +{ + struct stat sb; + void *bin; + int ret; + int fd; + + fd = open(path, O_RDONLY); + if (fd == -1) { + perror("open()"); + return false; + } + + ret = stat(path, &sb); + if (ret) { + perror("stat()"); + goto err; + } + + bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (bin == MAP_FAILED) { + perror("mmap()"); + goto err; + } + + encl->bin = bin; + encl->bin_size = sb.st_size; + + close(fd); + return true; + +err: + close(fd); + return false; +} + +static bool encl_ioc_create(struct encl *encl) +{ + struct sgx_secs *secs = &encl->secs; + struct sgx_enclave_create ioc; + int rc; + + assert(encl->encl_base != 0); + + memset(secs, 0, sizeof(*secs)); + secs->ssa_frame_size = 1; + secs->attributes = SGX_ATTR_MODE64BIT; + secs->xfrm = 3; + secs->base = encl->encl_base; + secs->size = encl->encl_size; + + ioc.src = (unsigned long)secs; + rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc); + if (rc) { + fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n", + errno); + munmap((void *)secs->base, encl->encl_size); + return false; + } + + return true; +} + +static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) +{ + struct sgx_enclave_add_pages ioc; + struct sgx_secinfo secinfo; + int rc; + + memset(&secinfo, 0, sizeof(secinfo)); + secinfo.flags = seg->flags; + + ioc.src = (uint64_t)encl->src + seg->offset; + ioc.offset = seg->offset; + ioc.length = seg->size; + ioc.secinfo = (unsigned long)&secinfo; + ioc.flags = SGX_PAGE_MEASURE; + + rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc); + if (rc < 0) { + fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n", + errno); + return false; + } + + return true; +} + +bool encl_load(const char *path, struct encl *encl) +{ + Elf64_Phdr *phdr_tbl; + off_t src_offset; + Elf64_Ehdr *ehdr; + int i, j; + int ret; + + memset(encl, 0, sizeof(*encl)); + + ret = open("/dev/sgx_enclave", O_RDWR); + if (ret < 0) { + fprintf(stderr, "Unable to open /dev/sgx_enclave\n"); + goto err; + } + + encl->fd = ret; + + if (!encl_map_bin(path, encl)) + goto err; + + ehdr = encl->bin; + phdr_tbl = encl->bin + ehdr->e_phoff; + + for (i = 0; i < ehdr->e_phnum; i++) { + Elf64_Phdr *phdr = &phdr_tbl[i]; + + if (phdr->p_type == PT_LOAD) + encl->nr_segments++; + } + + encl->segment_tbl = calloc(encl->nr_segments, + sizeof(struct encl_segment)); + if (!encl->segment_tbl) + goto err; + + for (i = 0, j = 0; i < ehdr->e_phnum; i++) { + Elf64_Phdr *phdr = &phdr_tbl[i]; + unsigned int flags = phdr->p_flags; + struct encl_segment *seg; + + if (phdr->p_type != PT_LOAD) + continue; + + seg = &encl->segment_tbl[j]; + + if (!!(flags & ~(PF_R | PF_W | PF_X))) { + fprintf(stderr, + "%d has invalid segment flags 0x%02x.\n", i, + phdr->p_flags); + goto err; + } + + if (j == 0 && flags != (PF_R | PF_W)) { + fprintf(stderr, + "TCS has invalid segment flags 0x%02x.\n", + phdr->p_flags); + goto err; + } + + if (j == 0) { + src_offset = phdr->p_offset & PAGE_MASK; + + seg->prot = PROT_READ | PROT_WRITE; + seg->flags = SGX_PAGE_TYPE_TCS << 8; + } else { + seg->prot = (phdr->p_flags & PF_R) ? PROT_READ : 0; + seg->prot |= (phdr->p_flags & PF_W) ? PROT_WRITE : 0; + seg->prot |= (phdr->p_flags & PF_X) ? PROT_EXEC : 0; + seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot; + } + + seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset; + seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK; + + printf("0x%016lx 0x%016lx 0x%02x\n", seg->offset, seg->size, + seg->prot); + + j++; + } + + assert(j == encl->nr_segments); + + encl->src = encl->bin + src_offset; + encl->src_size = encl->segment_tbl[j - 1].offset + + encl->segment_tbl[j - 1].size; + + for (encl->encl_size = 4096; encl->encl_size < encl->src_size; ) + encl->encl_size <<= 1; + + return true; + +err: + encl_delete(encl); + return false; +} + +static bool encl_map_area(struct encl *encl) +{ + size_t encl_size = encl->encl_size; + void *area; + + area = mmap(NULL, encl_size * 2, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (area == MAP_FAILED) { + perror("mmap"); + return false; + } + + encl->encl_base = ((uint64_t)area + encl_size - 1) & ~(encl_size - 1); + + munmap(area, encl->encl_base - (uint64_t)area); + munmap((void *)(encl->encl_base + encl_size), + (uint64_t)area + encl_size - encl->encl_base); + + return true; +} + +bool encl_build(struct encl *encl) +{ + struct sgx_enclave_init ioc; + int ret; + int i; + + if (!encl_map_area(encl)) + return false; + + if (!encl_ioc_create(encl)) + return false; + + /* + * Pages must be added before mapping VMAs because their permissions + * cap the VMA permissions. + */ + for (i = 0; i < encl->nr_segments; i++) { + struct encl_segment *seg = &encl->segment_tbl[i]; + + if (!encl_ioc_add_pages(encl, seg)) + return false; + } + + ioc.sigstruct = (uint64_t)&encl->sigstruct; + ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc); + if (ret) { + fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n", + errno); + return false; + } + + return true; +} diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c new file mode 100644 index 000000000000..724cec700926 --- /dev/null +++ b/tools/testing/selftests/sgx/main.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "defines.h" +#include "main.h" +#include "../kselftest.h" + +static const uint64_t MAGIC = 0x1122334455667788ULL; +vdso_sgx_enter_enclave_t eenter; + +struct vdso_symtab { + Elf64_Sym *elf_symtab; + const char *elf_symstrtab; + Elf64_Word *elf_hashtab; +}; + +static void *vdso_get_base_addr(char *envp[]) +{ + Elf64_auxv_t *auxv; + int i; + + for (i = 0; envp[i]; i++) + ; + + auxv = (Elf64_auxv_t *)&envp[i + 1]; + + for (i = 0; auxv[i].a_type != AT_NULL; i++) { + if (auxv[i].a_type == AT_SYSINFO_EHDR) + return (void *)auxv[i].a_un.a_val; + } + + return NULL; +} + +static Elf64_Dyn *vdso_get_dyntab(void *addr) +{ + Elf64_Ehdr *ehdr = addr; + Elf64_Phdr *phdrtab = addr + ehdr->e_phoff; + int i; + + for (i = 0; i < ehdr->e_phnum; i++) + if (phdrtab[i].p_type == PT_DYNAMIC) + return addr + phdrtab[i].p_offset; + + return NULL; +} + +static void *vdso_get_dyn(void *addr, Elf64_Dyn *dyntab, Elf64_Sxword tag) +{ + int i; + + for (i = 0; dyntab[i].d_tag != DT_NULL; i++) + if (dyntab[i].d_tag == tag) + return addr + dyntab[i].d_un.d_ptr; + + return NULL; +} + +static bool vdso_get_symtab(void *addr, struct vdso_symtab *symtab) +{ + Elf64_Dyn *dyntab = vdso_get_dyntab(addr); + + symtab->elf_symtab = vdso_get_dyn(addr, dyntab, DT_SYMTAB); + if (!symtab->elf_symtab) + return false; + + symtab->elf_symstrtab = vdso_get_dyn(addr, dyntab, DT_STRTAB); + if (!symtab->elf_symstrtab) + return false; + + symtab->elf_hashtab = vdso_get_dyn(addr, dyntab, DT_HASH); + if (!symtab->elf_hashtab) + return false; + + return true; +} + +static unsigned long elf_sym_hash(const char *name) +{ + unsigned long h = 0, high; + + while (*name) { + h = (h << 4) + *name++; + high = h & 0xf0000000; + + if (high) + h ^= high >> 24; + + h &= ~high; + } + + return h; +} + +static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name) +{ + Elf64_Word bucketnum = symtab->elf_hashtab[0]; + Elf64_Word *buckettab = &symtab->elf_hashtab[2]; + Elf64_Word *chaintab = &symtab->elf_hashtab[2 + bucketnum]; + Elf64_Sym *sym; + Elf64_Word i; + + for (i = buckettab[elf_sym_hash(name) % bucketnum]; i != STN_UNDEF; + i = chaintab[i]) { + sym = &symtab->elf_symtab[i]; + if (!strcmp(name, &symtab->elf_symstrtab[sym->st_name])) + return sym; + } + + return NULL; +} + +bool report_results(struct sgx_enclave_run *run, int ret, uint64_t result, + const char *test) +{ + bool valid = true; + + if (ret) { + printf("FAIL: %s() returned: %d\n", test, ret); + valid = false; + } + + if (run->function != EEXIT) { + printf("FAIL: %s() function, expected: %u, got: %u\n", test, EEXIT, + run->function); + valid = false; + } + + if (result != MAGIC) { + printf("FAIL: %s(), expected: 0x%lx, got: 0x%lx\n", test, MAGIC, + result); + valid = false; + } + + if (run->user_data) { + printf("FAIL: %s() user data, expected: 0x0, got: 0x%llx\n", + test, run->user_data); + valid = false; + } + + return valid; +} + +static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r9, + struct sgx_enclave_run *run) +{ + run->user_data = 0; + return 0; +} + +int main(int argc, char *argv[], char *envp[]) +{ + struct sgx_enclave_run run; + struct vdso_symtab symtab; + Elf64_Sym *eenter_sym; + uint64_t result = 0; + struct encl encl; + unsigned int i; + void *addr; + int ret; + + memset(&run, 0, sizeof(run)); + + if (!encl_load("test_encl.elf", &encl)) { + encl_delete(&encl); + ksft_exit_skip("cannot load enclaves\n"); + } + + if (!encl_measure(&encl)) + goto err; + + if (!encl_build(&encl)) + goto err; + + /* + * An enclave consumer only must do this. + */ + for (i = 0; i < encl.nr_segments; i++) { + struct encl_segment *seg = &encl.segment_tbl[i]; + + addr = mmap((void *)encl.encl_base + seg->offset, seg->size, + seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0); + if (addr == MAP_FAILED) { + fprintf(stderr, "mmap() failed, errno=%d.\n", errno); + exit(KSFT_FAIL); + } + } + + memset(&run, 0, sizeof(run)); + run.tcs = encl.encl_base; + + addr = vdso_get_base_addr(envp); + if (!addr) + goto err; + + if (!vdso_get_symtab(addr, &symtab)) + goto err; + + eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); + if (!eenter_sym) + goto err; + + eenter = addr + eenter_sym->st_value; + + ret = sgx_call_vdso((void *)&MAGIC, &result, 0, EENTER, NULL, NULL, &run); + if (!report_results(&run, ret, result, "sgx_call_vdso")) + goto err; + + + /* Invoke the vDSO directly. */ + result = 0; + ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, + 0, 0, &run); + if (!report_results(&run, ret, result, "eenter")) + goto err; + + /* And with an exit handler. */ + run.user_handler = (__u64)user_handler; + run.user_data = 0xdeadbeef; + ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, + 0, 0, &run); + if (!report_results(&run, ret, result, "user_handler")) + goto err; + + printf("SUCCESS\n"); + encl_delete(&encl); + exit(KSFT_PASS); + +err: + encl_delete(&encl); + exit(KSFT_FAIL); +} diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h new file mode 100644 index 000000000000..45e6ab65442a --- /dev/null +++ b/tools/testing/selftests/sgx/main.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2016-20 Intel Corporation. + */ + +#ifndef MAIN_H +#define MAIN_H + +struct encl_segment { + off_t offset; + size_t size; + unsigned int prot; + unsigned int flags; +}; + +struct encl { + int fd; + void *bin; + off_t bin_size; + void *src; + size_t src_size; + size_t encl_size; + off_t encl_base; + unsigned int nr_segments; + struct encl_segment *segment_tbl; + struct sgx_secs secs; + struct sgx_sigstruct sigstruct; +}; + +void encl_delete(struct encl *ctx); +bool encl_load(const char *path, struct encl *encl); +bool encl_measure(struct encl *encl); +bool encl_build(struct encl *encl); + +int sgx_call_vdso(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, + struct sgx_enclave_run *run); + +#endif /* MAIN_H */ diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c new file mode 100644 index 000000000000..cc06f108bae7 --- /dev/null +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -0,0 +1,391 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "defines.h" +#include "main.h" + +struct q1q2_ctx { + BN_CTX *bn_ctx; + BIGNUM *m; + BIGNUM *s; + BIGNUM *q1; + BIGNUM *qr; + BIGNUM *q2; +}; + +static void free_q1q2_ctx(struct q1q2_ctx *ctx) +{ + BN_CTX_free(ctx->bn_ctx); + BN_free(ctx->m); + BN_free(ctx->s); + BN_free(ctx->q1); + BN_free(ctx->qr); + BN_free(ctx->q2); +} + +static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m, + struct q1q2_ctx *ctx) +{ + ctx->bn_ctx = BN_CTX_new(); + ctx->s = BN_bin2bn(s, SGX_MODULUS_SIZE, NULL); + ctx->m = BN_bin2bn(m, SGX_MODULUS_SIZE, NULL); + ctx->q1 = BN_new(); + ctx->qr = BN_new(); + ctx->q2 = BN_new(); + + if (!ctx->bn_ctx || !ctx->s || !ctx->m || !ctx->q1 || !ctx->qr || + !ctx->q2) { + free_q1q2_ctx(ctx); + return false; + } + + return true; +} + +static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1, + uint8_t *q2) +{ + struct q1q2_ctx ctx; + + if (!alloc_q1q2_ctx(s, m, &ctx)) { + fprintf(stderr, "Not enough memory for Q1Q2 calculation\n"); + return false; + } + + if (!BN_mul(ctx.q1, ctx.s, ctx.s, ctx.bn_ctx)) + goto out; + + if (!BN_div(ctx.q1, ctx.qr, ctx.q1, ctx.m, ctx.bn_ctx)) + goto out; + + if (BN_num_bytes(ctx.q1) > SGX_MODULUS_SIZE) { + fprintf(stderr, "Too large Q1 %d bytes\n", + BN_num_bytes(ctx.q1)); + goto out; + } + + if (!BN_mul(ctx.q2, ctx.s, ctx.qr, ctx.bn_ctx)) + goto out; + + if (!BN_div(ctx.q2, NULL, ctx.q2, ctx.m, ctx.bn_ctx)) + goto out; + + if (BN_num_bytes(ctx.q2) > SGX_MODULUS_SIZE) { + fprintf(stderr, "Too large Q2 %d bytes\n", + BN_num_bytes(ctx.q2)); + goto out; + } + + BN_bn2bin(ctx.q1, q1); + BN_bn2bin(ctx.q2, q2); + + free_q1q2_ctx(&ctx); + return true; +out: + free_q1q2_ctx(&ctx); + return false; +} + +struct sgx_sigstruct_payload { + struct sgx_sigstruct_header header; + struct sgx_sigstruct_body body; +}; + +static bool check_crypto_errors(void) +{ + int err; + bool had_errors = false; + const char *filename; + int line; + char str[256]; + + for ( ; ; ) { + if (ERR_peek_error() == 0) + break; + + had_errors = true; + err = ERR_get_error_line(&filename, &line); + ERR_error_string_n(err, str, sizeof(str)); + fprintf(stderr, "crypto: %s: %s:%d\n", str, filename, line); + } + + return had_errors; +} + +static inline const BIGNUM *get_modulus(RSA *key) +{ + const BIGNUM *n; + + RSA_get0_key(key, &n, NULL, NULL); + return n; +} + +static RSA *gen_sign_key(void) +{ + BIGNUM *e; + RSA *key; + int ret; + + e = BN_new(); + key = RSA_new(); + + if (!e || !key) + goto err; + + ret = BN_set_word(e, RSA_3); + if (ret != 1) + goto err; + + ret = RSA_generate_key_ex(key, 3072, e, NULL); + if (ret != 1) + goto err; + + BN_free(e); + + return key; + +err: + RSA_free(key); + BN_free(e); + + return NULL; +} + +static void reverse_bytes(void *data, int length) +{ + int i = 0; + int j = length - 1; + uint8_t temp; + uint8_t *ptr = data; + + while (i < j) { + temp = ptr[i]; + ptr[i] = ptr[j]; + ptr[j] = temp; + i++; + j--; + } +} + +enum mrtags { + MRECREATE = 0x0045544145524345, + MREADD = 0x0000000044444145, + MREEXTEND = 0x00444E4554584545, +}; + +static bool mrenclave_update(EVP_MD_CTX *ctx, const void *data) +{ + if (!EVP_DigestUpdate(ctx, data, 64)) { + fprintf(stderr, "digest update failed\n"); + return false; + } + + return true; +} + +static bool mrenclave_commit(EVP_MD_CTX *ctx, uint8_t *mrenclave) +{ + unsigned int size; + + if (!EVP_DigestFinal_ex(ctx, (unsigned char *)mrenclave, &size)) { + fprintf(stderr, "digest commit failed\n"); + return false; + } + + if (size != 32) { + fprintf(stderr, "invalid digest size = %u\n", size); + return false; + } + + return true; +} + +struct mrecreate { + uint64_t tag; + uint32_t ssaframesize; + uint64_t size; + uint8_t reserved[44]; +} __attribute__((__packed__)); + + +static bool mrenclave_ecreate(EVP_MD_CTX *ctx, uint64_t blob_size) +{ + struct mrecreate mrecreate; + uint64_t encl_size; + + for (encl_size = 0x1000; encl_size < blob_size; ) + encl_size <<= 1; + + memset(&mrecreate, 0, sizeof(mrecreate)); + mrecreate.tag = MRECREATE; + mrecreate.ssaframesize = 1; + mrecreate.size = encl_size; + + if (!EVP_DigestInit_ex(ctx, EVP_sha256(), NULL)) + return false; + + return mrenclave_update(ctx, &mrecreate); +} + +struct mreadd { + uint64_t tag; + uint64_t offset; + uint64_t flags; /* SECINFO flags */ + uint8_t reserved[40]; +} __attribute__((__packed__)); + +static bool mrenclave_eadd(EVP_MD_CTX *ctx, uint64_t offset, uint64_t flags) +{ + struct mreadd mreadd; + + memset(&mreadd, 0, sizeof(mreadd)); + mreadd.tag = MREADD; + mreadd.offset = offset; + mreadd.flags = flags; + + return mrenclave_update(ctx, &mreadd); +} + +struct mreextend { + uint64_t tag; + uint64_t offset; + uint8_t reserved[48]; +} __attribute__((__packed__)); + +static bool mrenclave_eextend(EVP_MD_CTX *ctx, uint64_t offset, + const uint8_t *data) +{ + struct mreextend mreextend; + int i; + + for (i = 0; i < 0x1000; i += 0x100) { + memset(&mreextend, 0, sizeof(mreextend)); + mreextend.tag = MREEXTEND; + mreextend.offset = offset + i; + + if (!mrenclave_update(ctx, &mreextend)) + return false; + + if (!mrenclave_update(ctx, &data[i + 0x00])) + return false; + + if (!mrenclave_update(ctx, &data[i + 0x40])) + return false; + + if (!mrenclave_update(ctx, &data[i + 0x80])) + return false; + + if (!mrenclave_update(ctx, &data[i + 0xC0])) + return false; + } + + return true; +} + +static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl, + struct encl_segment *seg) +{ + uint64_t end = seg->offset + seg->size; + uint64_t offset; + + for (offset = seg->offset; offset < end; offset += PAGE_SIZE) { + if (!mrenclave_eadd(ctx, offset, seg->flags)) + return false; + + if (!mrenclave_eextend(ctx, offset, encl->src + offset)) + return false; + } + + return true; +} + +bool encl_measure(struct encl *encl) +{ + uint64_t header1[2] = {0x000000E100000006, 0x0000000000010000}; + uint64_t header2[2] = {0x0000006000000101, 0x0000000100000060}; + struct sgx_sigstruct *sigstruct = &encl->sigstruct; + struct sgx_sigstruct_payload payload; + uint8_t digest[SHA256_DIGEST_LENGTH]; + unsigned int siglen; + RSA *key = NULL; + EVP_MD_CTX *ctx; + int i; + + memset(sigstruct, 0, sizeof(*sigstruct)); + + sigstruct->header.header1[0] = header1[0]; + sigstruct->header.header1[1] = header1[1]; + sigstruct->header.header2[0] = header2[0]; + sigstruct->header.header2[1] = header2[1]; + sigstruct->exponent = 3; + sigstruct->body.attributes = SGX_ATTR_MODE64BIT; + sigstruct->body.xfrm = 3; + + /* sanity check */ + if (check_crypto_errors()) + goto err; + + key = gen_sign_key(); + if (!key) + goto err; + + BN_bn2bin(get_modulus(key), sigstruct->modulus); + + ctx = EVP_MD_CTX_create(); + if (!ctx) + goto err; + + if (!mrenclave_ecreate(ctx, encl->src_size)) + goto err; + + for (i = 0; i < encl->nr_segments; i++) { + struct encl_segment *seg = &encl->segment_tbl[i]; + + if (!mrenclave_segment(ctx, encl, seg)) + goto err; + } + + if (!mrenclave_commit(ctx, sigstruct->body.mrenclave)) + goto err; + + memcpy(&payload.header, &sigstruct->header, sizeof(sigstruct->header)); + memcpy(&payload.body, &sigstruct->body, sizeof(sigstruct->body)); + + SHA256((unsigned char *)&payload, sizeof(payload), digest); + + if (!RSA_sign(NID_sha256, digest, SHA256_DIGEST_LENGTH, + sigstruct->signature, &siglen, key)) + goto err; + + if (!calc_q1q2(sigstruct->signature, sigstruct->modulus, sigstruct->q1, + sigstruct->q2)) + goto err; + + /* BE -> LE */ + reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE); + reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE); + reverse_bytes(sigstruct->q1, SGX_MODULUS_SIZE); + reverse_bytes(sigstruct->q2, SGX_MODULUS_SIZE); + + EVP_MD_CTX_destroy(ctx); + RSA_free(key); + return true; + +err: + EVP_MD_CTX_destroy(ctx); + RSA_free(key); + return false; +} diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c new file mode 100644 index 000000000000..cf25b5dc1e03 --- /dev/null +++ b/tools/testing/selftests/sgx/test_encl.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#include +#include "defines.h" + +static void *memcpy(void *dest, const void *src, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + ((char *)dest)[i] = ((char *)src)[i]; + + return dest; +} + +void encl_body(void *rdi, void *rsi) +{ + memcpy(rsi, rdi, 8); +} diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds new file mode 100644 index 000000000000..0fbbda7e665e --- /dev/null +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -0,0 +1,40 @@ +OUTPUT_FORMAT(elf64-x86-64) + +PHDRS +{ + tcs PT_LOAD; + text PT_LOAD; + data PT_LOAD; +} + +SECTIONS +{ + . = 0; + .tcs : { + *(.tcs*) + } : tcs + + . = ALIGN(4096); + .text : { + *(.text*) + *(.rodata*) + } : text + + . = ALIGN(4096); + .data : { + *(.data*) + } : data + + /DISCARD/ : { + *(.comment*) + *(.note*) + *(.debug*) + *(.eh_frame*) + } +} + +ASSERT(!DEFINED(.altinstructions), "ALTERNATIVES are not supported in enclaves") +ASSERT(!DEFINED(.altinstr_replacement), "ALTERNATIVES are not supported in enclaves") +ASSERT(!DEFINED(.discard.retpoline_safe), "RETPOLINE ALTERNATIVES are not supported in enclaves") +ASSERT(!DEFINED(.discard.nospec), "RETPOLINE ALTERNATIVES are not supported in enclaves") +ASSERT(!DEFINED(.got.plt), "Libcalls are not supported in enclaves") diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S new file mode 100644 index 000000000000..5d5680d4ea39 --- /dev/null +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2016-20 Intel Corporation. + */ + + .macro ENCLU + .byte 0x0f, 0x01, 0xd7 + .endm + + .section ".tcs", "aw" + .balign 4096 + + .fill 1, 8, 0 # STATE (set by CPU) + .fill 1, 8, 0 # FLAGS + .quad encl_ssa # OSSA + .fill 1, 4, 0 # CSSA (set by CPU) + .fill 1, 4, 1 # NSSA + .quad encl_entry # OENTRY + .fill 1, 8, 0 # AEP (set by EENTER and ERESUME) + .fill 1, 8, 0 # OFSBASE + .fill 1, 8, 0 # OGSBASE + .fill 1, 4, 0xFFFFFFFF # FSLIMIT + .fill 1, 4, 0xFFFFFFFF # GSLIMIT + .fill 4024, 1, 0 # Reserved + + # Identical to the previous TCS. + .fill 1, 8, 0 # STATE (set by CPU) + .fill 1, 8, 0 # FLAGS + .quad encl_ssa # OSSA + .fill 1, 4, 0 # CSSA (set by CPU) + .fill 1, 4, 1 # NSSA + .quad encl_entry # OENTRY + .fill 1, 8, 0 # AEP (set by EENTER and ERESUME) + .fill 1, 8, 0 # OFSBASE + .fill 1, 8, 0 # OGSBASE + .fill 1, 4, 0xFFFFFFFF # FSLIMIT + .fill 1, 4, 0xFFFFFFFF # GSLIMIT + .fill 4024, 1, 0 # Reserved + + .text + +encl_entry: + # RBX contains the base address for TCS, which is also the first address + # inside the enclave. By adding the value of le_stack_end to it, we get + # the absolute address for the stack. + lea (encl_stack)(%rbx), %rax + xchg %rsp, %rax + push %rax + + push %rcx # push the address after EENTER + push %rbx # push the enclave base address + + call encl_body + + pop %rbx # pop the enclave base address + + /* Clear volatile GPRs, except RAX (EEXIT function). */ + xor %rcx, %rcx + xor %rdx, %rdx + xor %rdi, %rdi + xor %rsi, %rsi + xor %r8, %r8 + xor %r9, %r9 + xor %r10, %r10 + xor %r11, %r11 + + # Reset status flags. + add %rdx, %rdx # OF = SF = AF = CF = 0; ZF = PF = 1 + + # Prepare EEXIT target by popping the address of the instruction after + # EENTER to RBX. + pop %rbx + + # Restore the caller stack. + pop %rax + mov %rax, %rsp + + # EEXIT + mov $4, %rax + enclu + + .section ".data", "aw" + +encl_ssa: + .space 4096 + + .balign 4096 + .space 8192 +encl_stack: -- cgit v1.3.1 From 0eaa8d153a1d573e53b8283c90db44057d1376f6 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 18 Nov 2020 19:06:40 +0200 Subject: selftests/sgx: Use a statically generated 3072-bit RSA key Use a statically generated key for signing the enclave, because generating keys on the fly can eat the kernel entropy pool. Another good reason for doing this is predictable builds. The RSA has been arbitrarily selected. It's contents do not matter. This also makes the selftest execute a lot quicker instead of the delay that it had before (because of slow key generation). [ bp: Disambiguate "static key" which means something else in the kernel, fix typos. ] Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Cc: linux-kselftest@vger.kernel.org Link: https://lkml.kernel.org/r/20201118170640.39629-1-jarkko@kernel.org --- tools/testing/selftests/sgx/Makefile | 6 ++++- tools/testing/selftests/sgx/main.h | 3 +++ tools/testing/selftests/sgx/sign_key.S | 12 ++++++++++ tools/testing/selftests/sgx/sign_key.pem | 39 ++++++++++++++++++++++++++++++++ tools/testing/selftests/sgx/sigstruct.c | 34 ++++++++++------------------ 5 files changed, 71 insertions(+), 23 deletions(-) create mode 100644 tools/testing/selftests/sgx/sign_key.S create mode 100644 tools/testing/selftests/sgx/sign_key.pem (limited to 'tools') diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index d51c90663943..7f12d55b97f8 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -25,7 +25,8 @@ endif $(OUTPUT)/test_sgx: $(OUTPUT)/main.o \ $(OUTPUT)/load.o \ $(OUTPUT)/sigstruct.o \ - $(OUTPUT)/call.o + $(OUTPUT)/call.o \ + $(OUTPUT)/sign_key.o $(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto $(OUTPUT)/main.o: main.c @@ -40,6 +41,9 @@ $(OUTPUT)/sigstruct.o: sigstruct.c $(OUTPUT)/call.o: call.S $(CC) $(HOST_CFLAGS) -c $< -o $@ +$(OUTPUT)/sign_key.o: sign_key.S + $(CC) $(HOST_CFLAGS) -c $< -o $@ + $(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S $(CC) $(ENCL_CFLAGS) -T $^ -o $@ diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index 45e6ab65442a..67211a708f04 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -27,6 +27,9 @@ struct encl { struct sgx_sigstruct sigstruct; }; +extern unsigned char sign_key[]; +extern unsigned char sign_key_end[]; + void encl_delete(struct encl *ctx); bool encl_load(const char *path, struct encl *encl); bool encl_measure(struct encl *encl); diff --git a/tools/testing/selftests/sgx/sign_key.S b/tools/testing/selftests/sgx/sign_key.S new file mode 100644 index 000000000000..e4fbe948444a --- /dev/null +++ b/tools/testing/selftests/sgx/sign_key.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/** +* Copyright(c) 2016-20 Intel Corporation. +*/ + + .section ".rodata", "a" + +sign_key: + .globl sign_key + .incbin "sign_key.pem" +sign_key_end: + .globl sign_key_end diff --git a/tools/testing/selftests/sgx/sign_key.pem b/tools/testing/selftests/sgx/sign_key.pem new file mode 100644 index 000000000000..d76f21f19187 --- /dev/null +++ b/tools/testing/selftests/sgx/sign_key.pem @@ -0,0 +1,39 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIG4wIBAAKCAYEApalGbq7Q+usM91CPtksu3D+b0Prc8gAFL6grM3mg85A5Bx8V +cfMXPgtrw8EYFwQxDAvzZWwl+9VfOX0ECrFRBkOHcOiG0SnADN8+FLj1UiNUQwbp +S6OzhNWuRcSbGraSOyUlVlV0yMQSvewyzGklOaXBe30AJqzIBc8QfdSxKuP8rs0Z +ga6k/Bl73osrYKByILJTUUeZqjLERsE6GebsdzbWgKn8qVqng4ZS4yMNg6LeRlH3 ++9CIPgg4jwpSLHcp7dq2qTIB9a0tGe9ayp+5FbucpB6U7ePold0EeRN6RlJGDF9k +L93v8P5ykz5G5gYZ2g0K1X2sHIWV4huxPgv5PXgdyQYbK+6olqj0d5rjYuwX57Ul +k6SroPS1U6UbdCjG5txM+BNGU0VpD0ZhrIRw0leQdnNcCO9sTJuInZrgYacSVJ7u +mtB+uCt+uzUesc+l+xPRYA+9e14lLkZp7AAmo9FvL816XDI09deehJ3i/LmHKCRN +tuqC5TprRjFwUr6dAgEDAoIBgG5w2Z8fNfycs0+LCnmHdJLVEotR6KFVWMpwHMz7 +wKJgJgS/Y6FMuilc8oKAuroCy11dTO5IGVKOP3uorVx2NgQtBPXwWeDGgAiU1A3Q +o4wXjYIEm4fCd63jyYPYZ2ckYXzDbjmOTdstYdPyzIhGGNEZK6eoqsRzMAPfYFPj +IMdCqHSIu6vJw1K7p+myHOsVoWshjODaZnF3LYSA0WaZ8vokjwBxUxuRxQJZjJds +s60XPtmL+qfgWtQFewoG4XL6GuD8FcXccynRRtzrLtFNPIl9BQfWfjBBhTC1/Te1 +0Z6XbZvpdUTD9OfLB7SbR2OUFNpKQgriO0iYVdbW3cr7uu38Zwp4W1TX73DPjoi6 +KNooP6SGWd4mRJW2+dUmSYS4QNG8eVVZswKcploEIXlAKRsOe4kzJJ1iETugIe85 +uX8nd1WYEp65xwoRUg8hqng0MeyveVbXqNKuJG6tzNDt9kgFYo+hmC/oouAW2Dtc +T9jdRAwKJXqA2Eg6OkgXCEv+kwKBwQDYaQiFMlFhsmLlqI+EzCUh7c941/cL7m6U +7j98+8ngl0HgCEcrc10iJVCKakQW3YbPzAx3XkKTaGjWazvvrFarXIGlOud64B8a +iWyQ7VdlnmZnNEdk+C83tI91OQeaTKqRLDGzKh29Ry/jL8Pcbazt+kDgxa0H7qJp +roADUanLQuNkYubpbhFBh3xpa2EExaVq6rF7nIVsD8W9TrbmPKA4LgH7z0iy544D +kVCNYsTjYDdUWP+WiSor8kCnnpjnN9sCgcEAw/eNezUD1UDf6OYFC9+5JZJFn4Tg +mZMyN93JKIb199ffwnjtHUSjcyiWeesXucpzwtGbTcwQnDisSW4oneYKLSEBlBaq +scqiUugyGZZOthFSCbdXYXMViK2vHrKlkse7GxVlROKcEhM/pRBrmjaGO8eWR+D4 +FO2wCXzVs3KgV6j779frw0vC54oHOxc9+Lu1rSHp4i+600koyvL/zF6U/5tZXIvN +YW2yoiQJnjCmVA1pwbwV6KAUTPDTMnBK+YjnAoHBAJBGBa4hi5Z27JkbCliIGMFJ +NPs6pLKe9GNJf6in2+sPgUAFhMeiPhbDiwbxgrnpBIqICE+ULGJFmzmc0p/IOceT +ARjR76dAFLxbnbXzj5kURETNhO36yiUjCk4mBRGIcbYddndxaSjaH+zKgpLzyJ6m +1esuc1qfFvEfAAI2cTIsl5hB70ZJYNZaUvDyQK3ZGPHxy6e9rkgKg9OJz0QoatAe +q/002yHvtAJg4F5B2JeVejg7VQ8GHB1MKxppu0TP5wKBwQCCpQj8zgKOKz/wmViy +lSYZDC5qWJW7t3bP6TDFr06lOpUsUJ4TgxeiGw778g/RMaKB4RIz3WBoJcgw9BsT +7rFza1ZiucchMcGMmswRDt8kC4wGejpA92Owc8oUdxkMhSdnY5jYlxK2t3/DYEe8 +JFl9L7mFQKVjSSAGUzkiTGrlG1Kf5UfXh9dFBq98uilQfSPIwUaWynyM23CHTKqI +Pw3/vOY9sojrnncWwrEUIG7is5vWfWPwargzSzd29YdRBe8CgcEAuRVewK/YeNOX +B7ZG6gKKsfsvrGtY7FPETzLZAHjoVXYNea4LVZ2kn4hBXXlvw/4HD+YqcTt4wmif +5JQlDvjNobUiKJZpzy7hklVhF7wZFl4pCF7Yh43q9iQ7gKTaeUG7MiaK+G8Zz8aY +HW9rsiihbdZkccMvnPfO9334XMxl3HtBRzLstjUlbLB7Sdh+7tZ3JQidCOFNs5pE +XyWwnASPu4tKfDahH1UUTp1uJcq/6716CSWg080avYxFcn75qqsb +-----END RSA PRIVATE KEY----- diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index cc06f108bae7..dee7a3d6c5a5 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -135,33 +135,21 @@ static inline const BIGNUM *get_modulus(RSA *key) static RSA *gen_sign_key(void) { - BIGNUM *e; + unsigned long sign_key_length; + BIO *bio; RSA *key; - int ret; - e = BN_new(); - key = RSA_new(); + sign_key_length = (unsigned long)&sign_key_end - + (unsigned long)&sign_key; - if (!e || !key) - goto err; - - ret = BN_set_word(e, RSA_3); - if (ret != 1) - goto err; - - ret = RSA_generate_key_ex(key, 3072, e, NULL); - if (ret != 1) - goto err; + bio = BIO_new_mem_buf(&sign_key, sign_key_length); + if (!bio) + return NULL; - BN_free(e); + key = PEM_read_bio_RSAPrivateKey(bio, NULL, NULL, NULL); + BIO_free(bio); return key; - -err: - RSA_free(key); - BN_free(e); - - return NULL; } static void reverse_bytes(void *data, int length) @@ -339,8 +327,10 @@ bool encl_measure(struct encl *encl) goto err; key = gen_sign_key(); - if (!key) + if (!key) { + ERR_print_errors_fp(stdout); goto err; + } BN_bn2bin(get_modulus(key), sigstruct->modulus); -- cgit v1.3.1 From d055126180564a57fe533728a4e93d0cb53d49b3 Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Tue, 17 Nov 2020 18:45:49 +0000 Subject: bpf: Add bpf_ktime_get_coarse_ns helper The helper uses CLOCK_MONOTONIC_COARSE source of time that is less accurate but more performant. We have a BPF CGROUP_SKB firewall that supports event logging through bpf_perf_event_output(). Each event has a timestamp and currently we use bpf_ktime_get_ns() for it. Use of bpf_ktime_get_coarse_ns() saves ~15-20 ns in time required for event logging. bpf_ktime_get_ns(): EgressLogByRemoteEndpoint 113.82ns 8.79M bpf_ktime_get_coarse_ns(): EgressLogByRemoteEndpoint 95.40ns 10.48M Signed-off-by: Dmitrii Banshchikov Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201117184549.257280-1-me@ubique.spb.ru --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 11 +++++++++++ kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 13 +++++++++++++ kernel/trace/bpf_trace.c | 2 ++ tools/include/uapi/linux/bpf.h | 11 +++++++++++ 6 files changed, 39 insertions(+) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 581b2a2e78eb..e1bcb6d7345c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1842,6 +1842,7 @@ extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; +extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a52299b80b9d..3ca6146f001a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3797,6 +3797,16 @@ union bpf_attr { * is cleared if the flag is not specified. * Return * **-EINVAL** if invalid *flags* are passed, zero otherwise. + * + * u64 bpf_ktime_get_coarse_ns(void) + * Description + * Return a coarse-grained version of the time elapsed since + * system boot, in nanoseconds. Does not include time the system + * was suspended. + * + * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) + * Return + * Current *ktime*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3959,6 +3969,7 @@ union bpf_attr { FN(task_storage_delete), \ FN(get_current_task_btf), \ FN(bprm_opts_set), \ + FN(ktime_get_coarse_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 55454d2278b1..ff55cbcfbab4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2211,6 +2211,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak; +const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 25520f5eeaf6..2c395deae279 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -167,6 +167,17 @@ const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_ktime_get_coarse_ns) +{ + return ktime_get_coarse_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { + .func = bpf_ktime_get_coarse_ns, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; @@ -685,6 +696,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; + case BPF_FUNC_ktime_get_coarse_ns: + return &bpf_ktime_get_coarse_ns_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 02986c7b90eb..d255bc9b2bfa 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1280,6 +1280,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; + case BPF_FUNC_ktime_get_coarse_ns: + return &bpf_ktime_get_coarse_ns_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_get_current_pid_tgid: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a52299b80b9d..3ca6146f001a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3797,6 +3797,16 @@ union bpf_attr { * is cleared if the flag is not specified. * Return * **-EINVAL** if invalid *flags* are passed, zero otherwise. + * + * u64 bpf_ktime_get_coarse_ns(void) + * Description + * Return a coarse-grained version of the time elapsed since + * system boot, in nanoseconds. Does not include time the system + * was suspended. + * + * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) + * Return + * Current *ktime*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3959,6 +3969,7 @@ union bpf_attr { FN(task_storage_delete), \ FN(get_current_task_btf), \ FN(bprm_opts_set), \ + FN(ktime_get_coarse_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 6016df8fe874e1cf36f6357d71438b384198ce06 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 18 Nov 2020 08:16:38 +0100 Subject: selftests/bpf: Fix broken riscv build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The selftests/bpf Makefile includes system include directories from the host, when building BPF programs. On RISC-V glibc requires that __riscv_xlen is defined. This is not the case for "clang -target bpf", which messes up __WORDSIZE (errno.h -> ... -> wordsize.h) and breaks the build. By explicitly defining __risc_xlen correctly for riscv, we can workaround this. Fixes: 167381f3eac0 ("selftests/bpf: Makefile fix "missing" headers on build with -idirafter") Signed-off-by: Björn Töpel Signed-off-by: Andrii Nakryiko Acked-by: Luke Nelson Link: https://lore.kernel.org/bpf/20201118071640.83773-2-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c1708ffa6b1c..3d5940cd110d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -219,7 +219,8 @@ $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ # build would have failed anyways. define get_sys_includes $(shell $(1) -v -E - &1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) -dM -E - Date: Wed, 18 Nov 2020 08:16:39 +0100 Subject: selftests/bpf: Avoid running unprivileged tests with alignment requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some architectures have strict alignment requirements. In that case, the BPF verifier detects if a program has unaligned accesses and rejects them. A user can pass BPF_F_ANY_ALIGNMENT to a program to override this check. That, however, will only work when a privileged user loads a program. An unprivileged user loading a program with this flag will be rejected prior entering the verifier. Hence, it does not make sense to load unprivileged programs without strict alignment when testing the verifier. This patch avoids exactly that. Signed-off-by: Björn Töpel Signed-off-by: Andrii Nakryiko Acked-by: Luke Nelson Link: https://lore.kernel.org/bpf/20201118071640.83773-3-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 9be395d9dc64..4bfe3aa2cfc4 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1152,6 +1152,19 @@ static void get_unpriv_disabled() static bool test_as_unpriv(struct bpf_test *test) { +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + /* Some architectures have strict alignment requirements. In + * that case, the BPF verifier detects if a program has + * unaligned accesses and rejects them. A user can pass + * BPF_F_ANY_ALIGNMENT to a program to override this + * check. That, however, will only work when a privileged user + * loads a program. An unprivileged user loading a program + * with this flag will be rejected prior entering the + * verifier. + */ + if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) + return false; +#endif return !test->prog_type || test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER || test->prog_type == BPF_PROG_TYPE_CGROUP_SKB; -- cgit v1.3.1 From 6007b23cc7555df882be870433dc589841d4eb06 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 18 Nov 2020 08:16:40 +0100 Subject: selftests/bpf: Mark tests that require unaligned memory access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A lot of tests require unaligned memory access to work. Mark the tests as such, so that they can be avoided on unsupported architectures such as RISC-V. Signed-off-by: Björn Töpel Signed-off-by: Andrii Nakryiko Acked-by: Luke Nelson Link: https://lore.kernel.org/bpf/20201118071640.83773-4-bjorn.topel@gmail.com --- .../testing/selftests/bpf/verifier/ctx_sk_lookup.c | 7 ++++ .../selftests/bpf/verifier/direct_value_access.c | 3 ++ tools/testing/selftests/bpf/verifier/map_ptr.c | 1 + .../selftests/bpf/verifier/raw_tp_writable.c | 1 + .../testing/selftests/bpf/verifier/ref_tracking.c | 4 ++ tools/testing/selftests/bpf/verifier/regalloc.c | 8 ++++ tools/testing/selftests/bpf/verifier/wide_access.c | 46 +++++++++++++--------- 7 files changed, 52 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c index 2ad5f974451c..fb13ca2d5606 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c @@ -266,6 +266,7 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid 8-byte read from bpf_sk_lookup remote_ip4 field", @@ -292,6 +293,7 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid 8-byte read from bpf_sk_lookup remote_port field", @@ -305,6 +307,7 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid 8-byte read from bpf_sk_lookup local_ip4 field", @@ -331,6 +334,7 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid 8-byte read from bpf_sk_lookup local_port field", @@ -344,6 +348,7 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, /* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */ { @@ -410,6 +415,7 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid 4-byte unaligned read from bpf_sk_lookup at even offset", @@ -422,6 +428,7 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, /* in-bound and out-of-bound writes to bpf_sk_lookup */ { diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c index 988f46a1a4c7..c0648dc009b5 100644 --- a/tools/testing/selftests/bpf/verifier/direct_value_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c @@ -69,6 +69,7 @@ .fixup_map_array_48b = { 1 }, .result = REJECT, .errstr = "R1 min value is outside of the allowed memory range", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct map access, write test 7", @@ -195,6 +196,7 @@ .fixup_map_array_48b = { 1, 3 }, .result = REJECT, .errstr = "invalid access to map value, value_size=48 off=47 size=2", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct map access, write test 17", @@ -209,6 +211,7 @@ .fixup_map_array_48b = { 1, 3 }, .result = REJECT, .errstr = "invalid access to map value, value_size=48 off=47 size=2", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct map access, write test 18", diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index 637f9293bda8..b117bdd3806d 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -44,6 +44,7 @@ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = REJECT, .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "bpf_map_ptr: read ops field accepted", diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c index 95b5d70a1dc1..2978fb5a769d 100644 --- a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c +++ b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c @@ -31,4 +31,5 @@ .fixup_map_hash_8b = { 1, }, .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 006b5bd99c08..3b6ee009c00b 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -675,6 +675,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid mem access", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking: use ptr from bpf_sk_fullsock() after release", @@ -698,6 +699,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid mem access", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking: use ptr from bpf_sk_fullsock(tp) after release", @@ -725,6 +727,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid mem access", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking: use sk after bpf_sk_release(tp)", @@ -747,6 +750,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid mem access", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)", diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c index 4ad7e05de706..bb0dd89dd212 100644 --- a/tools/testing/selftests/bpf/verifier/regalloc.c +++ b/tools/testing/selftests/bpf/verifier/regalloc.c @@ -21,6 +21,7 @@ .fixup_map_hash_48b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "regalloc negative", @@ -71,6 +72,7 @@ .fixup_map_hash_48b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "regalloc src_reg negative", @@ -97,6 +99,7 @@ .result = REJECT, .errstr = "invalid access to map value, value_size=48 off=44 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "regalloc and spill", @@ -126,6 +129,7 @@ .fixup_map_hash_48b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "regalloc and spill negative", @@ -156,6 +160,7 @@ .result = REJECT, .errstr = "invalid access to map value, value_size=48 off=48 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "regalloc three regs", @@ -182,6 +187,7 @@ .fixup_map_hash_48b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "regalloc after call", @@ -210,6 +216,7 @@ .fixup_map_hash_48b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "regalloc in callee", @@ -240,6 +247,7 @@ .fixup_map_hash_48b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "regalloc, spill, JEQ", diff --git a/tools/testing/selftests/bpf/verifier/wide_access.c b/tools/testing/selftests/bpf/verifier/wide_access.c index ccade9312d21..55af248efa93 100644 --- a/tools/testing/selftests/bpf/verifier/wide_access.c +++ b/tools/testing/selftests/bpf/verifier/wide_access.c @@ -1,4 +1,4 @@ -#define BPF_SOCK_ADDR_STORE(field, off, res, err) \ +#define BPF_SOCK_ADDR_STORE(field, off, res, err, flgs) \ { \ "wide store to bpf_sock_addr." #field "[" #off "]", \ .insns = { \ @@ -11,31 +11,36 @@ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \ .errstr = err, \ + .flags = flgs, \ } /* user_ip6[0] is u64 aligned */ BPF_SOCK_ADDR_STORE(user_ip6, 0, ACCEPT, - NULL), + NULL, 0), BPF_SOCK_ADDR_STORE(user_ip6, 1, REJECT, - "invalid bpf_context access off=12 size=8"), + "invalid bpf_context access off=12 size=8", + F_NEEDS_EFFICIENT_UNALIGNED_ACCESS), BPF_SOCK_ADDR_STORE(user_ip6, 2, ACCEPT, - NULL), + NULL, 0), BPF_SOCK_ADDR_STORE(user_ip6, 3, REJECT, - "invalid bpf_context access off=20 size=8"), + "invalid bpf_context access off=20 size=8", + F_NEEDS_EFFICIENT_UNALIGNED_ACCESS), /* msg_src_ip6[0] is _not_ u64 aligned */ BPF_SOCK_ADDR_STORE(msg_src_ip6, 0, REJECT, - "invalid bpf_context access off=44 size=8"), + "invalid bpf_context access off=44 size=8", + F_NEEDS_EFFICIENT_UNALIGNED_ACCESS), BPF_SOCK_ADDR_STORE(msg_src_ip6, 1, ACCEPT, - NULL), + NULL, 0), BPF_SOCK_ADDR_STORE(msg_src_ip6, 2, REJECT, - "invalid bpf_context access off=52 size=8"), + "invalid bpf_context access off=52 size=8", + F_NEEDS_EFFICIENT_UNALIGNED_ACCESS), BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT, - "invalid bpf_context access off=56 size=8"), + "invalid bpf_context access off=56 size=8", 0), #undef BPF_SOCK_ADDR_STORE -#define BPF_SOCK_ADDR_LOAD(field, off, res, err) \ +#define BPF_SOCK_ADDR_LOAD(field, off, res, err, flgs) \ { \ "wide load from bpf_sock_addr." #field "[" #off "]", \ .insns = { \ @@ -48,26 +53,31 @@ BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \ .errstr = err, \ + .flags = flgs, \ } /* user_ip6[0] is u64 aligned */ BPF_SOCK_ADDR_LOAD(user_ip6, 0, ACCEPT, - NULL), + NULL, 0), BPF_SOCK_ADDR_LOAD(user_ip6, 1, REJECT, - "invalid bpf_context access off=12 size=8"), + "invalid bpf_context access off=12 size=8", + F_NEEDS_EFFICIENT_UNALIGNED_ACCESS), BPF_SOCK_ADDR_LOAD(user_ip6, 2, ACCEPT, - NULL), + NULL, 0), BPF_SOCK_ADDR_LOAD(user_ip6, 3, REJECT, - "invalid bpf_context access off=20 size=8"), + "invalid bpf_context access off=20 size=8", + F_NEEDS_EFFICIENT_UNALIGNED_ACCESS), /* msg_src_ip6[0] is _not_ u64 aligned */ BPF_SOCK_ADDR_LOAD(msg_src_ip6, 0, REJECT, - "invalid bpf_context access off=44 size=8"), + "invalid bpf_context access off=44 size=8", + F_NEEDS_EFFICIENT_UNALIGNED_ACCESS), BPF_SOCK_ADDR_LOAD(msg_src_ip6, 1, ACCEPT, - NULL), + NULL, 0), BPF_SOCK_ADDR_LOAD(msg_src_ip6, 2, REJECT, - "invalid bpf_context access off=52 size=8"), + "invalid bpf_context access off=52 size=8", + F_NEEDS_EFFICIENT_UNALIGNED_ACCESS), BPF_SOCK_ADDR_LOAD(msg_src_ip6, 3, REJECT, - "invalid bpf_context access off=56 size=8"), + "invalid bpf_context access off=56 size=8", 0), #undef BPF_SOCK_ADDR_LOAD -- cgit v1.3.1 From f5eca0b279117f25020112a2f65ec9c3ea25f3ac Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 23 Oct 2020 10:45:39 +0800 Subject: selftests/powerpc/eeh: disable kselftest timeout setting for eeh-basic The eeh-basic test got its own 60 seconds timeout (defined in commit 414f50434aa2 "selftests/eeh: Bump EEH wait time to 60s") per breakable device. And we have discovered that the number of breakable devices varies on different hardware. The device recovery time ranges from 0 to 35 seconds. In our test pool it will take about 30 seconds to run on a Power8 system that with 5 breakable devices, 60 seconds to run on a Power9 system that with 4 breakable devices. Extend the timeout setting in the kselftest framework to 5 minutes to give it a chance to finish. Signed-off-by: Po-Hsu Lin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201023024539.9512-1-po-hsu.lin@canonical.com --- tools/testing/selftests/powerpc/eeh/Makefile | 2 +- tools/testing/selftests/powerpc/eeh/settings | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/eeh/settings (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile index b397babd569b..ae963eb2dc5b 100644 --- a/tools/testing/selftests/powerpc/eeh/Makefile +++ b/tools/testing/selftests/powerpc/eeh/Makefile @@ -3,7 +3,7 @@ noarg: $(MAKE) -C ../ TEST_PROGS := eeh-basic.sh -TEST_FILES := eeh-functions.sh +TEST_FILES := eeh-functions.sh settings top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/eeh/settings b/tools/testing/selftests/powerpc/eeh/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/settings @@ -0,0 +1 @@ +timeout=300 -- cgit v1.3.1 From fcb48454c23c5679d1a2e252f127642e91b05cbe Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Tue, 17 Nov 2020 16:59:11 +1100 Subject: selftests/powerpc: rfi_flush: disable entry flush if present We are about to add an entry flush. The rfi (exit) flush test measures the number of L1D flushes over a syscall with the RFI flush enabled and disabled. But if the entry flush is also enabled, the effect of enabling and disabling the RFI flush is masked. If there is a debugfs entry for the entry flush, disable it during the RFI flush and restore it later. Reported-by: Spoorthy S Signed-off-by: Russell Currey Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/security/rfi_flush.c | 35 ++++++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 93a65bd1f231..4e7b2776c367 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -85,19 +85,33 @@ int rfi_flush_test(void) __u64 l1d_misses_total = 0; unsigned long iterations = 100000, zero_size = 24 * 1024; unsigned long l1d_misses_expected; - int rfi_flush_org, rfi_flush; + int rfi_flush_orig, rfi_flush; + int have_entry_flush, entry_flush_orig; SKIP_IF(geteuid() != 0); // The PMU event we use only works on Power7 or later SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); - if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); } - rfi_flush = rfi_flush_org; + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + have_entry_flush = 0; + } else { + have_entry_flush = 1; + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + } + } + + rfi_flush = rfi_flush_orig; fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); FAIL_IF(fd < 0); @@ -106,6 +120,7 @@ int rfi_flush_test(void) FAIL_IF(perf_event_enable(fd)); + // disable L1 prefetching set_dscr(1); iter = repetitions; @@ -147,8 +162,8 @@ again: repetitions * l1d_misses_expected / 2, passes, repetitions); - if (rfi_flush == rfi_flush_org) { - rfi_flush = !rfi_flush_org; + if (rfi_flush == rfi_flush_orig) { + rfi_flush = !rfi_flush_orig; if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) { perror("error writing to powerpc/rfi_flush debugfs file"); return 1; @@ -164,11 +179,19 @@ again: set_dscr(0); - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) { + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { perror("unable to restore original value of powerpc/rfi_flush debugfs file"); return 1; } + if (have_entry_flush) { + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush " + "debugfs file"); + return 1; + } + } + return rc; } -- cgit v1.3.1 From 89a83a0c69c81a25ce91002b90ca27ed86132a0a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 17 Nov 2020 16:59:14 +1100 Subject: selftests/powerpc: entry flush test Add a test modelled on the RFI flush test which counts the number of L1D misses doing a simple syscall with the entry flush on and off. For simplicity of backporting, this test duplicates a lot of code from rfi_flush. We clean that up in the next patch. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/security/.gitignore | 1 + tools/testing/selftests/powerpc/security/Makefile | 2 +- .../selftests/powerpc/security/entry_flush.c | 198 +++++++++++++++++++++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/security/entry_flush.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index f795e06f5ae3..4257a1f156bb 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only rfi_flush +entry_flush diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index eadbbff50be6..921152caf1dc 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c new file mode 100644 index 000000000000..7ae7e37204c5 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +#define CACHELINE_SIZE 128 + +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +static void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +static void set_dscr(unsigned long val) +{ + static int init; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} + +int entry_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush, entry_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + entry_flush = entry_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (entry_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n", + entry_flush, l1d_misses_total, entry_flush ? '<' : '>', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n", + entry_flush, l1d_misses_total, entry_flush ? '>' : '<', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (entry_flush == entry_flush_orig) { + entry_flush = !entry_flush_orig; + if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(entry_flush_test, "entry_flush_test"); +} -- cgit v1.3.1 From 0d239f3b03efc78fb5b290aff6c747fecd3b98cb Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 17 Nov 2020 16:59:15 +1100 Subject: selftests/powerpc: refactor entry and rfi_flush tests For simplicity in backporting, the original entry_flush test contained a lot of duplicated code from the rfi_flush test. De-duplicate that code. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/include/utils.h | 5 ++ tools/testing/selftests/powerpc/security/Makefile | 2 + .../selftests/powerpc/security/entry_flush.c | 61 +------------------ .../selftests/powerpc/security/flush_utils.c | 70 ++++++++++++++++++++++ .../selftests/powerpc/security/flush_utils.h | 17 ++++++ .../testing/selftests/powerpc/security/rfi_flush.c | 61 +------------------ 6 files changed, 96 insertions(+), 120 deletions(-) create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.c create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.h (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 052b5a775dc2..b7d188fc87c7 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -42,6 +42,11 @@ int perf_event_enable(int fd); int perf_event_disable(int fd); int perf_event_reset(int fd); +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + #if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30) #include #include diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 921152caf1dc..f25e854fe370 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -11,3 +11,5 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S +$(OUTPUT)/rfi_flush: flush_utils.c +$(OUTPUT)/entry_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 7ae7e37204c5..78cf914fa321 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -15,66 +15,7 @@ #include #include #include "utils.h" - -#define CACHELINE_SIZE 128 - -struct perf_event_read { - __u64 nr; - __u64 l1d_misses; -}; - -static inline __u64 load(void *addr) -{ - __u64 tmp; - - asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); - - return tmp; -} - -static void syscall_loop(char *p, unsigned long iterations, - unsigned long zero_size) -{ - for (unsigned long i = 0; i < iterations; i++) { - for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) - load(p + j); - getppid(); - } -} - -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -static void set_dscr(unsigned long val) -{ - static int init; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} +#include "flush_utils.h" int entry_flush_test(void) { diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c new file mode 100644 index 000000000000..0c3c4c40c7fb --- /dev/null +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "flush_utils.h" + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +void set_dscr(unsigned long val) +{ + static int init; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h new file mode 100644 index 000000000000..07a5eb301466 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +/* + * Copyright 2018 IBM Corporation. + */ + +#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H +#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H + +#define CACHELINE_SIZE 128 + +void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size); + +void set_dscr(unsigned long val); + +#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 4e7b2776c367..7565fd786640 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -10,71 +10,12 @@ #include #include #include -#include #include #include #include #include "utils.h" +#include "flush_utils.h" -#define CACHELINE_SIZE 128 - -struct perf_event_read { - __u64 nr; - __u64 l1d_misses; -}; - -static inline __u64 load(void *addr) -{ - __u64 tmp; - - asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); - - return tmp; -} - -static void syscall_loop(char *p, unsigned long iterations, - unsigned long zero_size) -{ - for (unsigned long i = 0; i < iterations; i++) { - for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) - load(p + j); - getppid(); - } -} - -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned = 0; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -static void set_dscr(unsigned long val) -{ - static int init = 0; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} int rfi_flush_test(void) { -- cgit v1.3.1 From a61ea6379ae9dbb63fbf022d1456733520db6be7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:22 +0900 Subject: tools/bootconfig: Fix errno reference after printf() Fix not to refer the errno variable as the result of previous libc functions after printf() because printf() can change the errno. Link: https://lkml.kernel.org/r/160576520243.320071.51093664672431249.stgit@devnote2 Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 52 +++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index eb92027817a7..52eb2bbe8966 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -147,6 +147,12 @@ static int load_xbc_file(const char *path, char **buf) return ret; } +static int pr_errno(const char *msg, int err) +{ + pr_err("%s: %d\n", msg, err); + return err; +} + static int load_xbc_from_initrd(int fd, char **buf) { struct stat stat; @@ -162,26 +168,24 @@ static int load_xbc_from_initrd(int fd, char **buf) if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN) return 0; - if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) + return pr_errno("Failed to lseek for magic", -errno); + if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0) - return -errno; + return pr_errno("Failed to read", -errno); + /* Check the bootconfig magic bytes */ if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) return 0; - if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) + return pr_errno("Failed to lseek for size", -errno); if (read(fd, &size, sizeof(u32)) < 0) - return -errno; + return pr_errno("Failed to read size", -errno); if (read(fd, &csum, sizeof(u32)) < 0) - return -errno; + return pr_errno("Failed to read checksum", -errno); /* Wrong size error */ if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) { @@ -190,10 +194,8 @@ static int load_xbc_from_initrd(int fd, char **buf) } if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN), - SEEK_SET) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + SEEK_SET) < 0) + return pr_errno("Failed to lseek", -errno); ret = load_xbc_fd(fd, buf, size); if (ret < 0) @@ -262,14 +264,16 @@ static int show_xbc(const char *path, bool list) ret = stat(path, &st); if (ret < 0) { - pr_err("Failed to stat %s: %d\n", path, -errno); - return -errno; + ret = -errno; + pr_err("Failed to stat %s: %d\n", path, ret); + return ret; } fd = open(path, O_RDONLY); if (fd < 0) { - pr_err("Failed to open initrd %s: %d\n", path, fd); - return -errno; + ret = -errno; + pr_err("Failed to open initrd %s: %d\n", path, ret); + return ret; } ret = load_xbc_from_initrd(fd, &buf); @@ -307,8 +311,9 @@ static int delete_xbc(const char *path) fd = open(path, O_RDWR); if (fd < 0) { - pr_err("Failed to open initrd %s: %d\n", path, fd); - return -errno; + ret = -errno; + pr_err("Failed to open initrd %s: %d\n", path, ret); + return ret; } size = load_xbc_from_initrd(fd, &buf); @@ -383,9 +388,10 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Apply new one */ fd = open(path, O_RDWR | O_APPEND); if (fd < 0) { - pr_err("Failed to open %s: %d\n", path, fd); + ret = -errno; + pr_err("Failed to open %s: %d\n", path, ret); free(data); - return fd; + return ret; } /* TODO: Ensure the @path is initramfs/initrd image */ ret = write(fd, data, size + 8); -- cgit v1.3.1 From a995e6bc0524450adfd6181dfdcd9d0520cfaba5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:31 +0900 Subject: tools/bootconfig: Fix to check the write failure correctly Fix to check the write(2) failure including partial write correctly and try to rollback the partial write, because if there is no BOOTCONFIG_MAGIC string, we can not remove it. Link: https://lkml.kernel.org/r/160576521135.320071.3883101436675969998.stgit@devnote2 Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly") Suggested-by: Linus Torvalds Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 52eb2bbe8966..a0733cbb3c49 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -337,6 +337,7 @@ static int delete_xbc(const char *path) static int apply_xbc(const char *path, const char *xbc_path) { + struct stat stat; u32 size, csum; char *buf, *data; int ret, fd; @@ -394,16 +395,26 @@ static int apply_xbc(const char *path, const char *xbc_path) return ret; } /* TODO: Ensure the @path is initramfs/initrd image */ + if (fstat(fd, &stat) < 0) { + pr_err("Failed to get the size of %s\n", path); + goto out; + } ret = write(fd, data, size + 8); - if (ret < 0) { + if (ret < size + 8) { + if (ret < 0) + ret = -errno; pr_err("Failed to apply a boot config: %d\n", ret); - goto out; + if (ret < 0) + goto out; + goto out_rollback; } /* Write a magic word of the bootconfig */ ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); - if (ret < 0) { + if (ret < BOOTCONFIG_MAGIC_LEN) { + if (ret < 0) + ret = -errno; pr_err("Failed to apply a boot config magic: %d\n", ret); - goto out; + goto out_rollback; } ret = 0; out: @@ -411,6 +422,17 @@ out: free(data); return ret; + +out_rollback: + /* Map the partial write to -ENOSPC */ + if (ret >= 0) + ret = -ENOSPC; + if (ftruncate(fd, stat.st_size) < 0) { + ret = -errno; + pr_err("Failed to rollback the write error: %d\n", ret); + pr_err("The initrd %s may be corrupted. Recommend to rebuild.\n", path); + } + goto out; } static int usage(void) -- cgit v1.3.1 From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:40 +0900 Subject: tools/bootconfig: Align the bootconfig applied initrd image size to 4 Align the bootconfig applied initrd image size to 4. To fill the gap, the bootconfig command uses null characters in between the bootconfig data and the footer. This will expands the footer size but don't change the checksum. Thus the block image of the initrd file with bootconfig is as follows. [initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"] Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 ++ tools/bootconfig/main.c | 57 ++++++++++++++++++++++--------------- tools/bootconfig/test-bootconfig.sh | 6 +++- 3 files changed, 42 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 9903088891fa..2696eb0fc149 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -12,6 +12,9 @@ #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 +#define BOOTCONFIG_ALIGN_SHIFT 2 +#define BOOTCONFIG_ALIGN (1 << BOOTCONFIG_ALIGN_SHIFT) +#define BOOTCONFIG_ALIGN_MASK (BOOTCONFIG_ALIGN - 1) /* XBC tree node */ struct xbc_node { diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index a0733cbb3c49..4a445b6304bb 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -337,12 +337,13 @@ static int delete_xbc(const char *path) static int apply_xbc(const char *path, const char *xbc_path) { + char *buf, *data, *p; + size_t total_size; struct stat stat; + const char *msg; u32 size, csum; - char *buf, *data; + int pos, pad; int ret, fd; - const char *msg; - int pos; ret = load_xbc_file(xbc_path, &buf); if (ret < 0) { @@ -352,13 +353,12 @@ static int apply_xbc(const char *path, const char *xbc_path) size = strlen(buf) + 1; csum = checksum((unsigned char *)buf, size); - /* Prepare xbc_path data */ - data = malloc(size + 8); + /* Backup the bootconfig data */ + data = calloc(size + BOOTCONFIG_ALIGN + + sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1); if (!data) return -ENOMEM; - strcpy(data, buf); - *(u32 *)(data + size) = size; - *(u32 *)(data + size + 4) = csum; + memcpy(data, buf, size); /* Check the data format */ ret = xbc_init(buf, &msg, &pos); @@ -399,24 +399,35 @@ static int apply_xbc(const char *path, const char *xbc_path) pr_err("Failed to get the size of %s\n", path); goto out; } - ret = write(fd, data, size + 8); - if (ret < size + 8) { + + /* To align up the total size to BOOTCONFIG_ALIGN, get padding size */ + total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN; + pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size; + size += pad; + + /* Add a footer */ + p = data + size; + *(u32 *)p = size; + p += sizeof(u32); + + *(u32 *)p = csum; + p += sizeof(u32); + + memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); + p += BOOTCONFIG_MAGIC_LEN; + + total_size = p - data; + + ret = write(fd, data, total_size); + if (ret < total_size) { if (ret < 0) ret = -errno; pr_err("Failed to apply a boot config: %d\n", ret); - if (ret < 0) - goto out; - goto out_rollback; - } - /* Write a magic word of the bootconfig */ - ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); - if (ret < BOOTCONFIG_MAGIC_LEN) { - if (ret < 0) - ret = -errno; - pr_err("Failed to apply a boot config magic: %d\n", ret); - goto out_rollback; - } - ret = 0; + if (ret >= 0) + goto out_rollback; + } else + ret = 0; + out: close(fd); free(data); diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index d295e406a756..baed891d0ba4 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -9,6 +9,7 @@ else TESTDIR=. fi BOOTCONF=${TESTDIR}/bootconfig +ALIGN=4 INITRD=`mktemp ${TESTDIR}/initrd-XXXX` TEMPCONF=`mktemp ${TESTDIR}/temp-XXXX.bconf` @@ -59,7 +60,10 @@ echo "Show command test" xpass $BOOTCONF $INITRD echo "File size check" -xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12) +total_size=$(expr $bconf_size + $initrd_size + 9 + 12 + $ALIGN - 1 ) +total_size=$(expr $total_size / $ALIGN) +total_size=$(expr $total_size \* $ALIGN) +xpass test $new_size -eq $total_size echo "Apply command repeat test" xpass $BOOTCONF -a $TEMPCONF $INITRD -- cgit v1.3.1 From 1fd6cee127e2ddff36d648573d7566aafb0d0b77 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 18 Nov 2020 22:13:50 +0100 Subject: libbpf: Fix VERSIONED_SYM_COUNT number parsing We remove "other info" from "readelf -s --wide" output when parsing GLOBAL_SYM_COUNT variable, which was added in [1]. But we don't do that for VERSIONED_SYM_COUNT and it's failing the check_abi target on powerpc Fedora 33. The extra "other info" wasn't problem for VERSIONED_SYM_COUNT parsing until commit [2] added awk in the pipe, which assumes that the last column is symbol, but it can be "other info". Adding "other info" removal for VERSIONED_SYM_COUNT the same way as we did for GLOBAL_SYM_COUNT parsing. [1] aa915931ac3e ("libbpf: Fix readelf output parsing for Fedora") [2] 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check") Fixes: 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check") Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201118211350.1493421-1-jolsa@kernel.org --- tools/lib/bpf/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 5f9abed3e226..55bd78b3496f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) @@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ -- cgit v1.3.1 From c8a36aedf3e24768e94d87fdcdd37684bd241c44 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 17 Nov 2020 12:05:46 -0800 Subject: selftest/bpf: Test bpf_probe_read_user_str() strips trailing bytes after NUL Previously, bpf_probe_read_user_str() could potentially overcopy the trailing bytes after the NUL due to how do_strncpy_from_user() does the copy in long-sized strides. The issue has been fixed in the previous commit. This commit adds a selftest that ensures we don't regress bpf_probe_read_user_str() again. Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/4d977508fab4ec5b7b574b85bdf8b398868b6ee9.1605642949.git.dxu@dxuuu.xyz --- .../selftests/bpf/prog_tests/probe_read_user_str.c | 71 ++++++++++++++++++++++ .../selftests/bpf/progs/test_probe_read_user_str.c | 25 ++++++++ 2 files changed, 96 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c create mode 100644 tools/testing/selftests/bpf/progs/test_probe_read_user_str.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c new file mode 100644 index 000000000000..e419298132b5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "test_probe_read_user_str.skel.h" + +static const char str1[] = "mestring"; +static const char str2[] = "mestringalittlebigger"; +static const char str3[] = "mestringblubblubblubblubblub"; + +static int test_one_str(struct test_probe_read_user_str *skel, const char *str, + size_t len) +{ + int err, duration = 0; + char buf[256]; + + /* Ensure bytes after string are ones */ + memset(buf, 1, sizeof(buf)); + memcpy(buf, str, len); + + /* Give prog our userspace pointer */ + skel->bss->user_ptr = buf; + + /* Trigger tracepoint */ + usleep(1); + + /* Did helper fail? */ + if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n", + skel->bss->ret)) + return 1; + + /* Check that string was copied correctly */ + err = memcmp(skel->bss->buf, str, len); + if (CHECK(err, "memcmp", "prog copied wrong string")) + return 1; + + /* Now check that no extra trailing bytes were copied */ + memset(buf, 0, sizeof(buf)); + err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len); + if (CHECK(err, "memcmp", "trailing bytes were not stripped")) + return 1; + + return 0; +} + +void test_probe_read_user_str(void) +{ + struct test_probe_read_user_str *skel; + int err, duration = 0; + + skel = test_probe_read_user_str__open_and_load(); + if (CHECK(!skel, "test_probe_read_user_str__open_and_load", + "skeleton open and load failed\n")) + return; + + /* Give pid to bpf prog so it doesn't read from anyone else */ + skel->bss->pid = getpid(); + + err = test_probe_read_user_str__attach(skel); + if (CHECK(err, "test_probe_read_user_str__attach", + "skeleton attach failed: %d\n", err)) + goto out; + + if (test_one_str(skel, str1, sizeof(str1))) + goto out; + if (test_one_str(skel, str2, sizeof(str2))) + goto out; + if (test_one_str(skel, str3, sizeof(str3))) + goto out; + +out: + test_probe_read_user_str__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c new file mode 100644 index 000000000000..3ae398b75dcd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +#include + +pid_t pid = 0; +long ret = 0; +void *user_ptr = 0; +char buf[256] = {}; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int on_write(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 450d060e8f752a6ce052a2bffd3f01633472e330 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 18 Nov 2020 23:30:39 -0800 Subject: bpftool: Add {i,d}tlb_misses support for bpftool profile Commit 47c09d6a9f67("bpftool: Introduce "prog profile" command") introduced "bpftool prog profile" command which can be used to profile bpf program with metrics like # of instructions, This patch added support for itlb_misses and dtlb_misses. During an internal bpf program performance evaluation, I found these two metrics are also very useful. The following is an example output: $ bpftool prog profile id 324 duration 3 cycles itlb_misses 1885029 run_cnt 5134686073 cycles 306893 itlb_misses $ bpftool prog profile id 324 duration 3 cycles dtlb_misses 1827382 run_cnt 4943593648 cycles 5975636 dtlb_misses $ bpftool prog profile id 324 duration 3 cycles llc_misses 1836527 run_cnt 5019612972 cycles 4161041 llc_misses From the above, we can see quite some dtlb misses, 3 dtlb misses perf prog run. This might be something worth further investigation. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201119073039.4060095-1-yhs@fb.com --- tools/bpf/bpftool/prog.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index acdb2c245f0a..1fe3ba255bad 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1717,6 +1717,34 @@ struct profile_metric { .ratio_desc = "LLC misses per million insns", .ratio_mul = 1e6, }, + { + .name = "itlb_misses", + .attr = { + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_ITLB | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + .exclude_user = 1 + }, + .ratio_metric = 2, + .ratio_desc = "itlb misses per million insns", + .ratio_mul = 1e6, + }, + { + .name = "dtlb_misses", + .attr = { + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_DTLB | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + .exclude_user = 1 + }, + .ratio_metric = 2, + .ratio_desc = "dtlb misses per million insns", + .ratio_mul = 1e6, + }, }; static __u64 profile_total_count; @@ -2109,7 +2137,7 @@ static int do_help(int argc, char **argv) " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" - " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n" + " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2]); -- cgit v1.3.1 From f5098e34dd4c774c3040e417960f1637e5daade8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Nov 2020 11:33:02 -0800 Subject: selftests/seccomp: powerpc: Fix typo in macro variable name A typo sneaked into the powerpc selftest. Fix the name so it builds again. Fixes: 46138329faea ("selftests/seccomp: powerpc: Fix seccomp return value testing") Acked-by: Michael Ellerman Link: https://lore.kernel.org/lkml/87y2ix2895.fsf@mpe.ellerman.id.au Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 4a180439ee9e..7f7ecfcd66db 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1758,10 +1758,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) * and the code is stored as a positive value. \ */ \ if (_result < 0) { \ - SYSCALL_RET(_regs) = -result; \ + SYSCALL_RET(_regs) = -_result; \ (_regs).ccr |= 0x10000000; \ } else { \ - SYSCALL_RET(_regs) = result; \ + SYSCALL_RET(_regs) = _result; \ (_regs).ccr &= ~0x10000000; \ } \ } while (0) -- cgit v1.3.1 From 4c222f31fb1db4d590503a181a6268ced9252379 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Nov 2020 11:54:43 -0800 Subject: selftests/seccomp: sh: Fix register names It looks like the seccomp selftests was never actually built for sh. This fixes it, though I don't have an environment to do a runtime test of it yet. Fixes: 0bb605c2c7f2b4b3 ("sh: Add SECCOMP_FILTER") Tested-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/lkml/a36d7b48-6598-1642-e403-0c77a86f416d@physik.fu-berlin.de Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7f7ecfcd66db..26c72f2b61b1 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1804,8 +1804,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] #elif defined(__sh__) # define ARCH_REGS struct pt_regs -# define SYSCALL_NUM(_regs) (_regs).gpr[3] -# define SYSCALL_RET(_regs) (_regs).gpr[0] +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# define SYSCALL_RET(_regs) (_regs).regs[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif -- cgit v1.3.1 From 192cf32243ce39af65bd095625aec374b38c03df Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 11 Oct 2020 10:47:45 -0500 Subject: selftests/seccomp: Compare bitmap vs filter overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of the seccomp benchmarking, include the expectations with regard to the timing behavior of the constant action bitmaps, and report inconsistencies better. Example output with constant action bitmaps on x86: $ sudo ./seccomp_benchmark 100000000 Current BPF sysctl settings: net.core.bpf_jit_enable = 1 net.core.bpf_jit_harden = 0 Benchmarking 200000000 syscalls... 129.359381409 - 0.008724424 = 129350656985 (129.4s) getpid native: 646 ns 264.385890006 - 129.360453229 = 135025436777 (135.0s) getpid RET_ALLOW 1 filter (bitmap): 675 ns 399.400511893 - 264.387045901 = 135013465992 (135.0s) getpid RET_ALLOW 2 filters (bitmap): 675 ns 545.872866260 - 399.401718327 = 146471147933 (146.5s) getpid RET_ALLOW 3 filters (full): 732 ns 696.337101319 - 545.874097681 = 150463003638 (150.5s) getpid RET_ALLOW 4 filters (full): 752 ns Estimated total seccomp overhead for 1 bitmapped filter: 29 ns Estimated total seccomp overhead for 2 bitmapped filters: 29 ns Estimated total seccomp overhead for 3 full filters: 86 ns Estimated total seccomp overhead for 4 full filters: 106 ns Estimated seccomp entry overhead: 29 ns Estimated seccomp per-filter overhead (last 2 diff): 20 ns Estimated seccomp per-filter overhead (filters / 4): 19 ns Expectations: native ≤ 1 bitmap (646 ≤ 675): ✔️ native ≤ 1 filter (646 ≤ 732): ✔️ per-filter (last 2 diff) ≈ per-filter (filters / 4) (20 ≈ 19): ✔️ 1 bitmapped ≈ 2 bitmapped (29 ≈ 29): ✔️ entry ≈ 1 bitmapped (29 ≈ 29): ✔️ entry ≈ 2 bitmapped (29 ≈ 29): ✔️ native + entry + (per filter * 4) ≈ 4 filters total (755 ≈ 752): ✔️ [YiFei: Changed commit message to show stats for this patch series] Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/1b61df3db85c5f7f1b9202722c45e7b39df73ef2.1602431034.git.yifeifz2@illinois.edu --- .../testing/selftests/seccomp/seccomp_benchmark.c | 151 ++++++++++++++++++--- tools/testing/selftests/seccomp/settings | 2 +- 2 files changed, 130 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 91f5a89cadac..fcc806585266 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -4,12 +4,16 @@ */ #define _GNU_SOURCE #include +#include +#include +#include #include #include #include #include #include #include +#include #include #include #include @@ -70,18 +74,74 @@ unsigned long long calibrate(void) return samples * seconds; } +bool approx(int i_one, int i_two) +{ + double one = i_one, one_bump = one * 0.01; + double two = i_two, two_bump = two * 0.01; + + one_bump = one + MAX(one_bump, 2.0); + two_bump = two + MAX(two_bump, 2.0); + + /* Equal to, or within 1% or 2 digits */ + if (one == two || + (one > two && one <= two_bump) || + (two > one && two <= one_bump)) + return true; + return false; +} + +bool le(int i_one, int i_two) +{ + if (i_one <= i_two) + return true; + return false; +} + +long compare(const char *name_one, const char *name_eval, const char *name_two, + unsigned long long one, bool (*eval)(int, int), unsigned long long two) +{ + bool good; + + printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, + (long long)one, name_eval, (long long)two); + if (one > INT_MAX) { + printf("Miscalculation! Measurement went negative: %lld\n", (long long)one); + return 1; + } + if (two > INT_MAX) { + printf("Miscalculation! Measurement went negative: %lld\n", (long long)two); + return 1; + } + + good = eval(one, two); + printf("%s\n", good ? "✔️" : "❌"); + + return good ? 0 : 1; +} + int main(int argc, char *argv[]) { + struct sock_filter bitmap_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog bitmap_prog = { + .len = (unsigned short)ARRAY_SIZE(bitmap_filter), + .filter = bitmap_filter, + }; struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, args[0])), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { .len = (unsigned short)ARRAY_SIZE(filter), .filter = filter, }; - long ret; - unsigned long long samples; - unsigned long long native, filter1, filter2; + + long ret, bits; + unsigned long long samples, calc; + unsigned long long native, filter1, filter2, bitmap1, bitmap2; + unsigned long long entry, per_filter1, per_filter2; printf("Current BPF sysctl settings:\n"); system("sysctl net.core.bpf_jit_enable"); @@ -101,35 +161,82 @@ int main(int argc, char *argv[]) ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); - /* One filter */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + /* One filter resulting in a bitmap */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); - filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1); + bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); + + /* Second filter resulting in a bitmap */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); + assert(ret == 0); - if (filter1 == native) - printf("No overhead measured!? Try running again with more samples.\n"); + bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); - /* Two filters */ + /* Third filter, can no longer be converted to bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); - filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2); - - /* Calculations */ - printf("Estimated total seccomp overhead for 1 filter: %llu ns\n", - filter1 - native); + filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); - printf("Estimated total seccomp overhead for 2 filters: %llu ns\n", - filter2 - native); + /* Fourth filter, can not be converted to bitmap because of filter 3 */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); + assert(ret == 0); - printf("Estimated seccomp per-filter overhead: %llu ns\n", - filter2 - filter1); + filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); + + /* Estimations */ +#define ESTIMATE(fmt, var, what) do { \ + var = (what); \ + printf("Estimated " fmt ": %llu ns\n", var); \ + if (var > INT_MAX) \ + goto more_samples; \ + } while (0) + + ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc, + bitmap1 - native); + ESTIMATE("total seccomp overhead for 2 bitmapped filters", calc, + bitmap2 - native); + ESTIMATE("total seccomp overhead for 3 full filters", calc, + filter1 - native); + ESTIMATE("total seccomp overhead for 4 full filters", calc, + filter2 - native); + ESTIMATE("seccomp entry overhead", entry, + bitmap1 - native - (bitmap2 - bitmap1)); + ESTIMATE("seccomp per-filter overhead (last 2 diff)", per_filter1, + filter2 - filter1); + ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2, + (filter2 - native - entry) / 4); + + printf("Expectations:\n"); + ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); + bits = compare("native", "≤", "1 filter", native, le, filter1); + if (bits) + goto more_samples; + + ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)", + per_filter1, approx, per_filter2); + + bits = compare("1 bitmapped", "≈", "2 bitmapped", + bitmap1 - native, approx, bitmap2 - native); + if (bits) { + printf("Skipping constant action bitmap expectations: they appear unsupported.\n"); + goto out; + } - printf("Estimated seccomp entry overhead: %llu ns\n", - filter1 - native - (filter2 - filter1)); + ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native); + ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native); + ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total", + entry + (per_filter1 * 4) + native, approx, filter2); + if (ret == 0) + goto out; +more_samples: + printf("Saw unexpected benchmark result. Try running again with more samples?\n"); +out: return 0; } diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings index ba4d85f74cd6..6091b45d226b 100644 --- a/tools/testing/selftests/seccomp/settings +++ b/tools/testing/selftests/seccomp/settings @@ -1 +1 @@ -timeout=90 +timeout=120 -- cgit v1.3.1 From fbb8531e58bd989868db3c2513d06870c46bd87f Mon Sep 17 00:00:00 2001 From: Antonio Cardace Date: Wed, 18 Nov 2020 21:45:20 +0100 Subject: selftests: extract common functions in ethtool-common.sh Factor out some useful functions so that they can be reused by other ethtool-netdevsim scripts. Signed-off-by: Antonio Cardace Signed-off-by: Jakub Kicinski --- .../drivers/net/netdevsim/ethtool-common.sh | 69 ++++++++++++++++++++++ .../drivers/net/netdevsim/ethtool-pause.sh | 63 +------------------- 2 files changed, 71 insertions(+), 61 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh new file mode 100644 index 000000000000..fa44cf6e732c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0 +NSIM_NETDEV= +num_passes=0 +num_errors=0 + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_nsim +} + +trap cleanup EXIT + +function get_netdev_name { + local -n old=$1 + + new=$(ls /sys/class/net) + + for netdev in $new; do + for check in $old; do + [ $netdev == $check ] && break + done + + if [ $netdev != $check ]; then + echo $netdev + break + fi + done +} + +function check { + local code=$1 + local str=$2 + local exp_str=$3 + + if [ $code -ne 0 ]; then + ((num_errors++)) + return + fi + + if [ "$str" != "$exp_str" ]; then + echo -e "Expected: '$exp_str', got '$str'" + ((num_errors++)) + return + fi + + ((num_passes++)) +} + +function make_netdev { + # Make a netdevsim + old_netdevs=$(ls /sys/class/net) + + if ! $(lsmod | grep -q netdevsim); then + modprobe netdevsim + fi + + echo $NSIM_ID > /sys/bus/netdevsim/new_device + echo `get_netdev_name old_netdevs` +} diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh index 25c896b9e2eb..b4a7abfe5454 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh @@ -1,60 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -NSIM_ID=$((RANDOM % 1024)) -NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID -NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0 -NSIM_NETDEV= -num_passes=0 -num_errors=0 - -function cleanup_nsim { - if [ -e $NSIM_DEV_SYS ]; then - echo $NSIM_ID > /sys/bus/netdevsim/del_device - fi -} - -function cleanup { - cleanup_nsim -} - -trap cleanup EXIT - -function get_netdev_name { - local -n old=$1 - - new=$(ls /sys/class/net) - - for netdev in $new; do - for check in $old; do - [ $netdev == $check ] && break - done - - if [ $netdev != $check ]; then - echo $netdev - break - fi - done -} - -function check { - local code=$1 - local str=$2 - local exp_str=$3 - - if [ $code -ne 0 ]; then - ((num_errors++)) - return - fi - - if [ "$str" != "$exp_str" ]; then - echo -e "Expected: '$exp_str', got '$str'" - ((num_errors++)) - return - fi - - ((num_passes++)) -} +source ethtool-common.sh # Bail if ethtool is too old if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then @@ -62,13 +9,7 @@ if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then exit 4 fi -# Make a netdevsim -old_netdevs=$(ls /sys/class/net) - -modprobe netdevsim -echo $NSIM_ID > /sys/bus/netdevsim/new_device - -NSIM_NETDEV=`get_netdev_name old_netdevs` +NSIM_NETDEV=$(make_netdev) set -o pipefail -- cgit v1.3.1 From 9e48ee80ac4e04c9985379d58248dd2a96a170ef Mon Sep 17 00:00:00 2001 From: Antonio Cardace Date: Wed, 18 Nov 2020 21:45:21 +0100 Subject: selftests: refactor get_netdev_name function As pointed out by Michal Kubecek, getting the name with the previous approach was racy, it's better and easier to get the name of the device with this patch's approach. Essentialy the function doesn't need to exist anymore as it's a simple 'ls' command. Signed-off-by: Antonio Cardace Signed-off-by: Jakub Kicinski --- .../drivers/net/netdevsim/ethtool-common.sh | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh index fa44cf6e732c..9f64d5c7107b 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -20,23 +20,6 @@ function cleanup { trap cleanup EXIT -function get_netdev_name { - local -n old=$1 - - new=$(ls /sys/class/net) - - for netdev in $new; do - for check in $old; do - [ $netdev == $check ] && break - done - - if [ $netdev != $check ]; then - echo $netdev - break - fi - done -} - function check { local code=$1 local str=$2 @@ -65,5 +48,6 @@ function make_netdev { fi echo $NSIM_ID > /sys/bus/netdevsim/new_device - echo `get_netdev_name old_netdevs` + # get new device name + ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/ } -- cgit v1.3.1 From fbb7a1f8137df4a693ea2b44096ad8ec518e3db1 Mon Sep 17 00:00:00 2001 From: Antonio Cardace Date: Wed, 18 Nov 2020 21:45:22 +0100 Subject: selftests: add ring and coalesce selftests Add scripts to test ring and coalesce settings of netdevsim. Signed-off-by: Antonio Cardace Signed-off-by: Jakub Kicinski --- .../drivers/net/netdevsim/ethtool-coalesce.sh | 132 +++++++++++++++++++++ .../drivers/net/netdevsim/ethtool-ring.sh | 85 +++++++++++++ 2 files changed, 217 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh create mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh new file mode 100755 index 000000000000..9adfba8f87e6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +function get_value { + local query="${SETTINGS_MAP[$1]}" + + echo $(ethtool -c $NSIM_NETDEV | \ + awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[ \t]/, "", $2); print $2}') +} + +function update_current_settings { + for key in ${!SETTINGS_MAP[@]}; do + CURRENT_SETTINGS[$key]=$(get_value $key) + done + echo ${CURRENT_SETTINGS[@]} +} + +if ! ethtool -h | grep -q coalesce; then + echo "SKIP: No --coalesce support in ethtool" + exit 4 +fi + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +declare -A SETTINGS_MAP=( + ["rx-frames-low"]="rx-frame-low" + ["tx-frames-low"]="tx-frame-low" + ["rx-frames-high"]="rx-frame-high" + ["tx-frames-high"]="tx-frame-high" + ["rx-usecs"]="rx-usecs" + ["rx-frames"]="rx-frames" + ["rx-usecs-irq"]="rx-usecs-irq" + ["rx-frames-irq"]="rx-frames-irq" + ["tx-usecs"]="tx-usecs" + ["tx-frames"]="tx-frames" + ["tx-usecs-irq"]="tx-usecs-irq" + ["tx-frames-irq"]="tx-frames-irq" + ["stats-block-usecs"]="stats-block-usecs" + ["pkt-rate-low"]="pkt-rate-low" + ["rx-usecs-low"]="rx-usecs-low" + ["tx-usecs-low"]="tx-usecs-low" + ["pkt-rate-high"]="pkt-rate-high" + ["rx-usecs-high"]="rx-usecs-high" + ["tx-usecs-high"]="tx-usecs-high" + ["sample-interval"]="sample-interval" +) + +declare -A CURRENT_SETTINGS=( + ["rx-frames-low"]="" + ["tx-frames-low"]="" + ["rx-frames-high"]="" + ["tx-frames-high"]="" + ["rx-usecs"]="" + ["rx-frames"]="" + ["rx-usecs-irq"]="" + ["rx-frames-irq"]="" + ["tx-usecs"]="" + ["tx-frames"]="" + ["tx-usecs-irq"]="" + ["tx-frames-irq"]="" + ["stats-block-usecs"]="" + ["pkt-rate-low"]="" + ["rx-usecs-low"]="" + ["tx-usecs-low"]="" + ["pkt-rate-high"]="" + ["rx-usecs-high"]="" + ["tx-usecs-high"]="" + ["sample-interval"]="" +) + +declare -A EXPECTED_SETTINGS=( + ["rx-frames-low"]="" + ["tx-frames-low"]="" + ["rx-frames-high"]="" + ["tx-frames-high"]="" + ["rx-usecs"]="" + ["rx-frames"]="" + ["rx-usecs-irq"]="" + ["rx-frames-irq"]="" + ["tx-usecs"]="" + ["tx-frames"]="" + ["tx-usecs-irq"]="" + ["tx-frames-irq"]="" + ["stats-block-usecs"]="" + ["pkt-rate-low"]="" + ["rx-usecs-low"]="" + ["tx-usecs-low"]="" + ["pkt-rate-high"]="" + ["rx-usecs-high"]="" + ["tx-usecs-high"]="" + ["sample-interval"]="" +) + +# populate the expected settings map +for key in ${!SETTINGS_MAP[@]}; do + EXPECTED_SETTINGS[$key]=$(get_value $key) +done + +# test +for key in ${!SETTINGS_MAP[@]}; do + value=$((RANDOM % $((2**32-1)))) + + ethtool -C $NSIM_NETDEV "$key" "$value" + + EXPECTED_SETTINGS[$key]="$value" + expected=${EXPECTED_SETTINGS[@]} + current=$(update_current_settings) + + check $? "$current" "$expected" + set +x +done + +# bool settings which ethtool displays on the same line +ethtool -C $NSIM_NETDEV adaptive-rx on +s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on TX: off") +check $? "$s" "" + +ethtool -C $NSIM_NETDEV adaptive-tx on +s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on TX: on") +check $? "$s" "" + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh new file mode 100755 index 000000000000..c969559ffa7a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +function get_value { + local query="${SETTINGS_MAP[$1]}" + + echo $(ethtool -g $NSIM_NETDEV | \ + tail -n +$CURR_SETT_LINE | \ + awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}') +} + +function update_current_settings { + for key in ${!SETTINGS_MAP[@]}; do + CURRENT_SETTINGS[$key]=$(get_value $key) + done + echo ${CURRENT_SETTINGS[@]} +} + +if ! ethtool -h | grep -q set-ring >/dev/null; then + echo "SKIP: No --set-ring support in ethtool" + exit 4 +fi + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +declare -A SETTINGS_MAP=( + ["rx"]="RX" + ["rx-mini"]="RX Mini" + ["rx-jumbo"]="RX Jumbo" + ["tx"]="TX" +) + +declare -A EXPECTED_SETTINGS=( + ["rx"]="" + ["rx-mini"]="" + ["rx-jumbo"]="" + ["tx"]="" +) + +declare -A CURRENT_SETTINGS=( + ["rx"]="" + ["rx-mini"]="" + ["rx-jumbo"]="" + ["tx"]="" +) + +MAX_VALUE=$((RANDOM % $((2**32-1)))) +RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/) + +for ring_max_entry in $RING_MAX_LIST; do + echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry +done + +CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:) + +# populate the expected settings map +for key in ${!SETTINGS_MAP[@]}; do + EXPECTED_SETTINGS[$key]=$(get_value $key) +done + +# test +for key in ${!SETTINGS_MAP[@]}; do + value=$((RANDOM % $MAX_VALUE)) + + ethtool -G $NSIM_NETDEV "$key" "$value" + + EXPECTED_SETTINGS[$key]="$value" + expected=${EXPECTED_SETTINGS[@]} + current=$(update_current_settings) + + check $? "$current" "$expected" + set +x +done + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi -- cgit v1.3.1 From 20ac8f8690535161d9357f5b4af4dfdf88c56578 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 19 Nov 2020 15:08:44 +0200 Subject: selftests: mlxsw: Add nexthop objects configuration tests Test that unsupported nexthop objects are rejected and that offload indication is correctly set on: nexthop objects, nexthop group objects and routes associated these objects. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 189 +++++++++++++++++++++ 1 file changed, 189 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index f4031002d5e9..5de47d72f8c9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -29,6 +29,10 @@ ALL_TESTS=" bridge_extern_learn_test neigh_offload_test nexthop_offload_test + nexthop_obj_invalid_test + nexthop_obj_offload_test + nexthop_obj_group_offload_test + nexthop_obj_route_offload_test devlink_reload_test " NUM_NETIFS=2 @@ -674,6 +678,191 @@ nexthop_offload_test() sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down } +nexthop_obj_invalid_test() +{ + # Test that invalid nexthop object configurations are rejected + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64 + setup_wait + + ip nexthop add id 1 via 192.0.2.3 fdb + check_fail $? "managed to configure an FDB nexthop when should not" + + ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1 + check_fail $? "managed to configure a nexthop with MPLS encap when should not" + + ip nexthop add id 1 blackhole + check_fail $? "managed to configure a blackhole nexthop when should not" + + ip nexthop add id 1 dev $swp1 + ip nexthop add id 2 dev $swp1 + ip nexthop add id 10 group 1/2 + check_fail $? "managed to configure a nexthop group with device-only nexthops when should not" + + log_test "nexthop objects - invalid configurations" + + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_offload_test() +{ + # Test offload indication of nexthop objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded when should" + + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop marked as offloaded after setting neigh to failed state" + + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded after neigh replace" + + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop marked as offloaded after replacing to use an invalid address" + + ip nexthop replace id 1 via 192.0.2.2 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded after replacing to use a valid address" + + log_test "nexthop objects offload indication" + + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_group_offload_test() +{ + # Test offload indication of nexthop group objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 + ip nexthop add id 10 group 1/2 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "IPv4 nexthop not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 2 + check_err $? "IPv6 nexthop not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group not marked as offloaded when should" + + # Invalidate nexthop id 1 + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 10 + check_fail $? "nexthop group not marked as offloaded with one valid nexthop" + + # Invalidate nexthop id 2 + ip neigh replace 2001:db8:1::2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group marked as offloaded when should not" + + # Revalidate nexthop id 1 + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group not marked as offloaded after revalidating nexthop" + + log_test "nexthop group objects offload indication" + + ip neigh del 2001:db8:1::2 dev $swp1 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 10 + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_route_offload_test() +{ + # Test offload indication of routes using nexthop objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + ip route replace 198.51.100.0/24 nhid 1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded when using valid nexthop" + + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded after replacing valid nexthop with a valid one" + + ip nexthop replace id 1 via 192.0.2.4 dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route marked as offloaded after replacing valid nexthop with an invalid one" + + ip nexthop replace id 1 via 192.0.2.2 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded after replacing invalid nexthop with a valid one" + + log_test "routes using nexthop objects offload indication" + + ip route del 198.51.100.0/24 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + devlink_reload_test() { # Test that after executing all the above configuration tests, a -- cgit v1.3.1 From ffb721515bf3352f38457fd2ab19f575e75e190e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 19 Nov 2020 15:08:45 +0200 Subject: selftests: forwarding: Do not configure nexthop objects twice routing_nh_obj() is used to configure the nexthop objects employed by the test, but it is called twice resulting in "RTNETLINK answers: File exists" messages. Remove the first call, so that the function is only called after setup_wait(), when all the interfaces are up and ready. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/router_mpath_nh.sh | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index cf3d26c233e8..6067477ff326 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -312,7 +312,6 @@ setup_prepare() router1_create router2_create - routing_nh_obj forwarding_enable } -- cgit v1.3.1 From 3600f29ad1399a1335af2030e8106ac8bbe9261a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 19 Nov 2020 15:08:46 +0200 Subject: selftests: forwarding: Test IPv4 routes with IPv6 link-local nexthops In addition to IPv4 multipath tests with IPv4 nexthops, also test IPv4 multipath with nexthops that use IPv6 link-local addresses. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/router_mpath_nh.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 6067477ff326..e8c2573d5232 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -280,6 +280,17 @@ multipath_test() multipath4_test "Weighted MP 2:1" 2 1 multipath4_test "Weighted MP 11:45" 11 45 + log_info "Running IPv4 multipath tests with IPv6 link-local nexthops" + ip nexthop replace id 101 via fe80:2::22 dev $rp12 + ip nexthop replace id 102 via fe80:3::23 dev $rp13 + + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 102 via 169.254.3.23 dev $rp13 + ip nexthop replace id 101 via 169.254.2.22 dev $rp12 + log_info "Running IPv6 multipath tests" multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 -- cgit v1.3.1 From e96fa54bbd90e487a8c230155db4231d9326ebcc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 19 Nov 2020 15:08:47 +0200 Subject: selftests: forwarding: Add device-only nexthop test In a similar fashion to router_multipath.sh and its nexthop objects version router_mpath_nh.sh, create a nexthop objects version of router.sh. It reuses the same topology, but uses device-only nexthop objects instead of legacy ones. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/router_nh.sh | 160 +++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/router_nh.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh new file mode 100755 index 000000000000..f3a53738bdcc --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_nh.sh @@ -0,0 +1,160 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp2 clsact + + ip address add 192.0.2.1/24 dev $rp1 + ip address add 2001:db8:1::1/64 dev $rp1 + + ip address add 198.51.100.1/24 dev $rp2 + ip address add 2001:db8:2::1/64 dev $rp2 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 198.51.100.1/24 dev $rp2 + + ip address del 2001:db8:1::1/64 dev $rp1 + ip address del 192.0.2.1/24 dev $rp1 + + tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +routing_nh_obj() +{ + # Create the nexthops as AF_INET6, so that IPv4 and IPv6 routes could + # use them. + ip -6 nexthop add id 101 dev $rp1 + ip -6 nexthop add id 102 dev $rp2 + + ip route replace 192.0.2.0/24 nhid 101 + ip route replace 2001:db8:1::/64 nhid 101 + ip route replace 198.51.100.0/24 nhid 102 + ip route replace 2001:db8:2::/64 nhid 102 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +routing_nh_obj + +tests_run + +exit $EXIT_STATUS -- cgit v1.3.1 From e035146d65603165078629671afa9409f659a358 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 19 Nov 2020 15:08:48 +0200 Subject: selftests: forwarding: Add multipath tunneling nexthop test Add a nexthop objects version of gre_multipath.sh. Unlike the original test, it also tests IPv6 overlay which is not possible with the legacy nexthop implementation. See commit 9a2ad3623868 ("selftests: forwarding: gre_multipath: Drop IPv6 tests") for more info. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/gre_multipath_nh.sh | 356 +++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/gre_multipath_nh.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh new file mode 100755 index 000000000000..d03aa2cab9fd --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -0,0 +1,356 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test traffic distribution when a wECMP route forwards traffic to two GRE +# tunnels. +# +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|------------------------+ +# | SW1 | | +# | $ol1 + | +# | 192.0.2.2/28 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (gre) + g1b (gre) | +# | loc=192.0.2.65 loc=192.0.2.81 | +# | rem=192.0.2.66 --. rem=192.0.2.82 --. | +# | tos=inherit | tos=inherit | | +# | .------------------' | | +# | | .------------------' | +# | v v | +# | + $ul1.111 (vlan) + $ul1.222 (vlan) | +# | | 192.0.2.129/28 | 192.0.2.145/28 | +# | \ / | +# | \________________/ | +# | | | +# | + $ul1 | +# +------------|-------------------------------+ +# | +# +------------|-------------------------------+ +# | SW2 + $ul2 | +# | _______|________ | +# | / \ | +# | / \ | +# | + $ul2.111 (vlan) + $ul2.222 (vlan) | +# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 | +# | | | | +# | | '------------------. | +# | '------------------. | | +# | + g2a (gre) | + g2b (gre) | | +# | loc=192.0.2.66 | loc=192.0.2.82 | | +# | rem=192.0.2.65 --' rem=192.0.2.81 --' | +# | tos=inherit tos=inherit | +# | | +# | $ol2 + | +# | 192.0.2.17/28 | | +# | 2001:db8:2::1/64 | | +# +-------------------|------------------------+ +# | +# +-------------------|-----+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# | 2001:db8:2::2/64 | +# +-------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_ipv4 + multipath_ipv6 + multipath_ipv6_l4 +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 + ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 +} + +sw1_create() +{ + simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $ul1 v$ol1 + vlan_create $ul1 111 v$ol1 192.0.2.129/28 + vlan_create $ul1 222 v$ol1 192.0.2.145/28 + + tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1 + __simple_if_init g1a v$ol1 192.0.2.65/32 + ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + + tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1 + __simple_if_init g1b v$ol1 192.0.2.81/32 + ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + + ip -6 nexthop add id 101 dev g1a + ip -6 nexthop add id 102 dev g1b + ip nexthop add id 103 group 101/102 + + ip route add vrf v$ol1 192.0.2.16/28 nhid 103 + ip route add vrf v$ol1 2001:db8:2::/64 nhid 103 +} + +sw1_destroy() +{ + ip route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 192.0.2.16/28 + + ip nexthop del id 103 + ip -6 nexthop del id 102 + ip -6 nexthop del id 101 + + ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + __simple_if_fini g1b 192.0.2.81/32 + tunnel_destroy g1b + + ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + __simple_if_fini g1a 192.0.2.65/32 + tunnel_destroy g1a + + vlan_destroy $ul1 222 + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64 +} + +sw2_create() +{ + simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 192.0.2.130/28 + vlan_create $ul2 222 v$ol2 192.0.2.146/28 + + tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 + __simple_if_init g2a v$ol2 192.0.2.66/32 + ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + + tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2 + __simple_if_init g2b v$ol2 192.0.2.82/32 + ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + + ip -6 nexthop add id 201 dev g2a + ip -6 nexthop add id 202 dev g2b + ip nexthop add id 203 group 201/202 + + ip route add vrf v$ol2 192.0.2.0/28 nhid 203 + ip route add vrf v$ol2 2001:db8:1::/64 nhid 203 + + tc qdisc add dev $ul2 clsact + tc filter add dev $ul2 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul2 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw2_destroy() +{ + tc qdisc del dev $ul2 clsact + + ip route del vrf v$ol2 2001:db8:1::/64 + ip route del vrf v$ol2 192.0.2.0/28 + + ip nexthop del id 203 + ip -6 nexthop del id 202 + ip -6 nexthop del id 201 + + ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + __simple_if_fini g2b 192.0.2.82/32 + tunnel_destroy g2b + + ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + __simple_if_fini g2a 192.0.2.66/32 + tunnel_destroy g2a + + vlan_destroy $ul2 222 + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 + ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +multipath4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 0 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + # Generate 16384 echo requests, each with a random flow label. + for ((i=0; i < 16384; ++i)); do + ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null + done + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +multipath6_l4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +multipath_ipv4() +{ + log_info "Running IPv4 multipath tests" + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6() +{ + log_info "Running IPv6 multipath tests" + multipath6_test "ECMP" 1 1 + multipath6_test "Weighted MP 2:1" 2 1 + multipath6_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6_l4() +{ + log_info "Running IPv6 L4 hash multipath tests" + multipath6_l4_test "ECMP" 1 1 + multipath6_l4_test "Weighted MP 2:1" 2 1 + multipath6_l4_test "Weighted MP 11:45" 11 45 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS -- cgit v1.3.1 From 8b819a84d4b12c4a91cc9f91ad69ca09c3e0606d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 19 Nov 2020 11:45:57 -0800 Subject: selftests: mptcp: add link failure test case Add a test case where a link fails with multiple subflows. The expectation is that MPTCP will transmit any data that could not be delivered via the failed link on another subflow. Co-developed-by: Geliang Tang Signed-off-by: Geliang Tang Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 104 +++++++++++++++++++----- 1 file changed, 82 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 0d93b243695f..f841ed8186c1 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -5,6 +5,7 @@ ret=0 sin="" sout="" cin="" +cinsent="" cout="" ksft_skip=4 timeout=30 @@ -81,7 +82,7 @@ cleanup_partial() cleanup() { rm -f "$cin" "$cout" - rm -f "$sin" "$sout" + rm -f "$sin" "$sout" "$cinsent" cleanup_partial } @@ -144,6 +145,13 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +print_file_err() +{ + ls -l "$1" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "$1" +} + check_transfer() { in=$1 @@ -155,6 +163,7 @@ check_transfer() echo "[ FAIL ] $what does not match (in, out):" print_file_err "$in" print_file_err "$out" + ret=1 return 1 fi @@ -175,6 +184,17 @@ do_ping() fi } +link_failure() +{ + ns="$1" + + l=$((RANDOM%4)) + l=$((l+1)) + + veth="ns1eth$l" + ip -net "$ns" link set "$veth" down +} + do_transfer() { listener_ns="$1" @@ -182,9 +202,10 @@ do_transfer() cl_proto="$3" srv_proto="$4" connect_addr="$5" - rm_nr_ns1="$6" - rm_nr_ns2="$7" - speed="$8" + test_link_fail="$6" + rm_nr_ns1="$7" + rm_nr_ns2="$8" + speed="$9" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -220,7 +241,12 @@ do_transfer() sleep 1 - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + if [ "$test_link_fail" -eq 0 ];then + ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + else + ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \ + ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" & + fi cpid=$! if [ $rm_nr_ns1 -gt 0 ]; then @@ -265,12 +291,17 @@ do_transfer() ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" cat "$capout" + ret=1 return 1 fi check_transfer $sin $cout "file received by client" retc=$? - check_transfer $cin $sout "file received by server" + if [ "$test_link_fail" -eq 0 ];then + check_transfer $cin $sout "file received by server" + else + check_transfer $cinsent $sout "file received by server" + fi rets=$? if [ $retc -eq 0 ] && [ $rets -eq 0 ];then @@ -286,13 +317,12 @@ make_file() { name=$1 who=$2 + size=$3 - SIZE=1 - - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" - echo "Created $name (size $SIZE KB) containing data sent by $who" + echo "Created $name (size $size KB) containing data sent by $who" } run_tests() @@ -300,14 +330,32 @@ run_tests() listener_ns="$1" connector_ns="$2" connect_addr="$3" - rm_nr_ns1="${4:-0}" - rm_nr_ns2="${5:-0}" - speed="${6:-fast}" + test_linkfail="${4:-0}" + rm_nr_ns1="${5:-0}" + rm_nr_ns2="${6:-0}" + speed="${7:-fast}" lret=0 + oldin="" + + if [ "$test_linkfail" -eq 1 ];then + size=$((RANDOM%1024)) + size=$((size+1)) + size=$((size*128)) + + oldin=$(mktemp) + cp "$cin" "$oldin" + make_file "$cin" "client" $size + fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${rm_nr_ns1} ${rm_nr_ns2} ${speed} + ${test_linkfail} ${rm_nr_ns1} ${rm_nr_ns2} ${speed} lret=$? + + if [ "$test_linkfail" -eq 1 ];then + cp "$oldin" "$cin" + rm -f "$oldin" + fi + if [ $lret -ne 0 ]; then ret=$lret return @@ -440,10 +488,11 @@ chk_rm_nr() sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) +cinsent=$(mktemp) cout=$(mktemp) init -make_file "$cin" "client" -make_file "$sin" "server" +make_file "$cin" "client" 1 +make_file "$sin" "server" 1 trap cleanup EXIT run_tests $ns1 $ns2 10.0.1.1 @@ -528,12 +577,23 @@ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 +# accept and use add_addr with additional subflows and link loss +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 1 +chk_join_nr "multiple flows, signal, link failure" 3 3 3 +chk_add_nr 1 1 + # add_addr timeout reset_with_add_addr_timeout ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 0 0 slow +run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 chk_add_nr 4 0 @@ -542,7 +602,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 1 slow +run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow chk_join_nr "remove single subflow" 1 1 1 chk_rm_nr 1 1 @@ -552,7 +612,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 2 slow +run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow chk_join_nr "remove multiple subflows" 2 2 2 chk_rm_nr 2 2 @@ -561,7 +621,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 10.0.1.1 1 0 slow +run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 chk_rm_nr 0 0 @@ -572,7 +632,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 1 1 slow +run_tests $ns1 $ns2 10.0.1.1 0 1 1 slow chk_join_nr "remove subflow and signal" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -584,7 +644,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 1 2 slow +run_tests $ns1 $ns2 10.0.1.1 0 1 2 slow chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 -- cgit v1.3.1 From 523514ed0a998fda389b9b6f00d0f2054ba30d25 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 19 Nov 2020 11:46:01 -0800 Subject: selftests: mptcp: add ADD_ADDR IPv6 test cases This patch added IPv6 support for do_transfer, and the test cases for ADD_ADDR IPv6. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 70 ++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f841ed8186c1..0eae628d1ffd 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -195,6 +195,12 @@ link_failure() ip -net "$ns" link set "$veth" down } +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + do_transfer() { listener_ns="$1" @@ -236,7 +242,15 @@ do_transfer() mptcp_connect="./mptcp_connect -r" fi - ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & + local local_addr + if is_v6 "${connect_addr}"; then + local_addr="::" + else + local_addr="0.0.0.0" + fi + + ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \ + -s ${srv_proto} ${local_addr} < "$sin" > "$sout" & spid=$! sleep 1 @@ -649,6 +663,60 @@ chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 +# subflow IPv6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow +run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow +chk_join_nr "single subflow IPv6" 1 1 1 + +# add_address, unused IPv6 +reset +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow +chk_join_nr "unused signal address IPv6" 0 0 0 +chk_add_nr 1 1 + +# signal address IPv6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow +chk_join_nr "single address IPv6" 1 1 1 +chk_add_nr 1 1 + +# add_addr timeout IPv6 +reset_with_add_addr_timeout 6 +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow +chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 +chk_add_nr 4 0 + +# single address IPv6, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_tests $ns1 $ns2 dead:beef:1::1 0 1 0 slow +chk_join_nr "remove single address IPv6" 1 1 1 +chk_add_nr 1 1 +chk_rm_nr 0 0 + +# subflow and signal IPv6, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow +run_tests $ns1 $ns2 dead:beef:1::1 0 1 1 slow +chk_join_nr "remove subflow and signal IPv6" 2 2 2 +chk_add_nr 1 1 +chk_rm_nr 1 1 + # single subflow, syncookies reset_with_cookies ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -- cgit v1.3.1 From d23e95c09067618eabd6d0e8cff372f0ce517c84 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 10 Nov 2020 18:36:17 -0800 Subject: pm-graph v5.8 - if wakeups occur in s2idle: "freeze time: N (-x ms waking y times) ms" - change FREEZELOOP and FREEZEWAKE to S2LOOP and S2WAKE for brevity - returns all sysfs vals to their initial state after testing - use the dmesg log for debugging until the test is completed, instrument the executeSuspend process to have a full trace, if test completes, formal dmesg log overwrites the debug log - fix CPU_ON and CPU_OFF devices in the timeline, should include [n] Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/README | 4 +- tools/power/pm-graph/sleepgraph.py | 387 +++++++++++++++++++++---------------- 2 files changed, 227 insertions(+), 164 deletions(-) (limited to 'tools') diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README index 89d0a7dab4bc..da468bd510ca 100644 --- a/tools/power/pm-graph/README +++ b/tools/power/pm-graph/README @@ -6,7 +6,7 @@ |_| |___/ |_| pm-graph: suspend/resume/boot timing analysis tools - Version: 5.7 + Version: 5.8 Author: Todd Brandt Home Page: https://01.org/pm-graph @@ -61,7 +61,7 @@ - runs with python2 or python3, choice is made by /usr/bin/python link - python - python-configparser (for python2 sleepgraph) - - python-requests (for googlesheet.py) + - python-requests (for stresstester.py) - linux-tools-common (for turbostat usage in sleepgraph) Ubuntu: diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 1bc36a1db14f..81f4b8abbdf7 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -81,7 +81,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.7' + version = '5.8' ansi = False rs = 0 display = '' @@ -92,8 +92,9 @@ class SystemValues: testlog = True dmesglog = True ftracelog = False + acpidebug = True tstat = True - mindevlen = 0.0 + mindevlen = 0.0001 mincglen = 0.0 cgphase = '' cgtest = -1 @@ -115,6 +116,7 @@ class SystemValues: fpdtpath = '/sys/firmware/acpi/tables/FPDT' epath = '/sys/kernel/debug/tracing/events/power/' pmdpath = '/sys/power/pm_debug_messages' + acpipath='/sys/module/acpi/parameters/debug_level' traceevents = [ 'suspend_resume', 'wakeup_source_activate', @@ -162,16 +164,16 @@ class SystemValues: devdump = False mixedphaseheight = True devprops = dict() + cfgdef = dict() platinfo = [] predelay = 0 postdelay = 0 - pmdebug = '' tmstart = 'SUSPEND START %Y%m%d-%H:%M:%S.%f' tmend = 'RESUME COMPLETE %Y%m%d-%H:%M:%S.%f' tracefuncs = { 'sys_sync': {}, 'ksys_sync': {}, - 'pm_notifier_call_chain_robust': {}, + '__pm_notifier_call_chain': {}, 'pm_prepare_console': {}, 'pm_notifier_call_chain': {}, 'freeze_processes': {}, @@ -490,9 +492,9 @@ class SystemValues: call('echo 0 > %s/wakealarm' % self.rtcpath, shell=True) def initdmesg(self): # get the latest time stamp from the dmesg log - fp = Popen('dmesg', stdout=PIPE).stdout + lines = Popen('dmesg', stdout=PIPE).stdout.readlines() ktime = '0' - for line in fp: + for line in reversed(lines): line = ascii(line).replace('\r\n', '') idx = line.find('[') if idx > 1: @@ -500,7 +502,7 @@ class SystemValues: m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) if(m): ktime = m.group('ktime') - fp.close() + break self.dmesgstart = float(ktime) def getdmesg(self, testdata): op = self.writeDatafileHeader(self.dmesgfile, testdata) @@ -715,8 +717,6 @@ class SystemValues: self.fsetVal('0', 'events/kprobes/enable') self.fsetVal('', 'kprobe_events') self.fsetVal('1024', 'buffer_size_kb') - if self.pmdebug: - self.setVal(self.pmdebug, self.pmdpath) def setupAllKprobes(self): for name in self.tracefuncs: self.defaultKprobe(name, self.tracefuncs[name]) @@ -740,11 +740,7 @@ class SystemValues: # turn trace off self.fsetVal('0', 'tracing_on') self.cleanupFtrace() - # pm debug messages - pv = self.getVal(self.pmdpath) - if pv != '1': - self.setVal('1', self.pmdpath) - self.pmdebug = pv + self.testVal(self.pmdpath, 'basic', '1') # set the trace clock to global self.fsetVal('global', 'trace_clock') self.fsetVal('nop', 'current_tracer') @@ -900,6 +896,14 @@ class SystemValues: if isgz: return gzip.open(filename, mode+'t') return open(filename, mode) + def putlog(self, filename, text): + with self.openlog(filename, 'a') as fp: + fp.write(text) + fp.close() + def dlog(self, text): + self.putlog(self.dmesgfile, '# %s\n' % text) + def flog(self, text): + self.putlog(self.ftracefile, text) def b64unzip(self, data): try: out = codecs.decode(base64.b64decode(data), 'zlib').decode() @@ -992,9 +996,7 @@ class SystemValues: # add a line for each of these commands with their outputs for name, cmdline, info in cmdafter: footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info)) - - with self.openlog(self.ftracefile, 'a') as fp: - fp.write(footer) + self.flog(footer) return True def commonPrefix(self, list): if len(list) < 2: @@ -1034,6 +1036,7 @@ class SystemValues: cmdline, cmdpath = ' '.join(cargs[2:]), self.getExec(cargs[2]) if not cmdpath or (begin and not delta): continue + self.dlog('[%s]' % cmdline) try: fp = Popen([cmdpath]+cargs[3:], stdout=PIPE, stderr=PIPE).stdout info = ascii(fp.read()).strip() @@ -1060,6 +1063,29 @@ class SystemValues: else: out.append((name, cmdline, '\tnothing' if not info else info)) return out + def testVal(self, file, fmt='basic', value=''): + if file == 'restoreall': + for f in self.cfgdef: + if os.path.exists(f): + fp = open(f, 'w') + fp.write(self.cfgdef[f]) + fp.close() + self.cfgdef = dict() + elif value and os.path.exists(file): + fp = open(file, 'r+') + if fmt == 'radio': + m = re.match('.*\[(?P.*)\].*', fp.read()) + if m: + self.cfgdef[file] = m.group('v') + elif fmt == 'acpi': + line = fp.read().strip().split('\n')[-1] + m = re.match('.* (?P[0-9A-Fx]*) .*', line) + if m: + self.cfgdef[file] = m.group('v') + else: + self.cfgdef[file] = fp.read().strip() + fp.write(value) + fp.close() def haveTurbostat(self): if not self.tstat: return False @@ -1201,6 +1227,57 @@ class SystemValues: self.multitest[sz] *= 1440 elif unit == 'h': self.multitest[sz] *= 60 + def displayControl(self, cmd): + xset, ret = 'timeout 10 xset -d :0.0 {0}', 0 + if self.sudouser: + xset = 'sudo -u %s %s' % (self.sudouser, xset) + if cmd == 'init': + ret = call(xset.format('dpms 0 0 0'), shell=True) + if not ret: + ret = call(xset.format('s off'), shell=True) + elif cmd == 'reset': + ret = call(xset.format('s reset'), shell=True) + elif cmd in ['on', 'off', 'standby', 'suspend']: + b4 = self.displayControl('stat') + ret = call(xset.format('dpms force %s' % cmd), shell=True) + if not ret: + curr = self.displayControl('stat') + self.vprint('Display Switched: %s -> %s' % (b4, curr)) + if curr != cmd: + self.vprint('WARNING: Display failed to change to %s' % cmd) + if ret: + self.vprint('WARNING: Display failed to change to %s with xset' % cmd) + return ret + elif cmd == 'stat': + fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout + ret = 'unknown' + for line in fp: + m = re.match('[\s]*Monitor is (?P.*)', ascii(line)) + if(m and len(m.group('m')) >= 2): + out = m.group('m').lower() + ret = out[3:] if out[0:2] == 'in' else out + break + fp.close() + return ret + def setRuntimeSuspend(self, before=True): + if before: + # runtime suspend disable or enable + if self.rs > 0: + self.rstgt, self.rsval, self.rsdir = 'on', 'auto', 'enabled' + else: + self.rstgt, self.rsval, self.rsdir = 'auto', 'on', 'disabled' + pprint('CONFIGURING RUNTIME SUSPEND...') + self.rslist = deviceInfo(self.rstgt) + for i in self.rslist: + self.setVal(self.rsval, i) + pprint('runtime suspend %s on all devices (%d changed)' % (self.rsdir, len(self.rslist))) + pprint('waiting 5 seconds...') + time.sleep(5) + else: + # runtime suspend re-enable or re-disable + for i in self.rslist: + self.setVal(self.rstgt, i) + pprint('runtime suspend settings restored on %d devices' % len(self.rslist)) sysvals = SystemValues() switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0'] @@ -1640,15 +1717,20 @@ class Data: if 'resume_machine' in phase and 'suspend_machine' in lp: tS, tR = self.dmesg[lp]['end'], self.dmesg[phase]['start'] tL = tR - tS - if tL > 0: - left = True if tR > tZero else False - self.trimTime(tS, tL, left) - if 'trying' in self.dmesg[lp] and self.dmesg[lp]['trying'] >= 0.001: - tTry = round(self.dmesg[lp]['trying'] * 1000) - text = '%.0f (-%.0f waking)' % (tL * 1000, tTry) + if tL <= 0: + continue + left = True if tR > tZero else False + self.trimTime(tS, tL, left) + if 'waking' in self.dmesg[lp]: + tCnt = self.dmesg[lp]['waking'][0] + if self.dmesg[lp]['waking'][1] >= 0.001: + tTry = '-%.0f' % (round(self.dmesg[lp]['waking'][1] * 1000)) else: - text = '%.0f' % (tL * 1000) - self.tLow.append(text) + tTry = '-%.3f' % (self.dmesg[lp]['waking'][1] * 1000) + text = '%.0f (%s ms waking %d times)' % (tL * 1000, tTry, tCnt) + else: + text = '%.0f' % (tL * 1000) + self.tLow.append(text) lp = phase def getMemTime(self): if not self.hwstart or not self.hwend: @@ -1921,7 +2003,7 @@ class Data: for dev in list: length = (list[dev]['end'] - list[dev]['start']) * 1000 width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal) - if width != '0.000000' and length >= mindevlen: + if length >= mindevlen: devlist.append(dev) self.tdevlist[phase] = devlist def addHorizontalDivider(self, devname, devend): @@ -3316,9 +3398,10 @@ def parseTraceLog(live=False): # trim out s2idle loops, track time trying to freeze llp = data.lastPhase(2) if llp.startswith('suspend_machine'): - if 'trying' not in data.dmesg[llp]: - data.dmesg[llp]['trying'] = 0 - data.dmesg[llp]['trying'] += \ + if 'waking' not in data.dmesg[llp]: + data.dmesg[llp]['waking'] = [0, 0.0] + data.dmesg[llp]['waking'][0] += 1 + data.dmesg[llp]['waking'][1] += \ t.time - data.dmesg[lp]['start'] data.currphase = '' del data.dmesg[lp] @@ -4555,7 +4638,7 @@ def createHTML(testruns, testfail): # draw the devices for this phase phaselist = data.dmesg[b]['list'] for d in sorted(data.tdevlist[b]): - dname = d if '[' not in d else d.split('[')[0] + dname = d if ('[' not in d or 'CPU' in d) else d.split('[')[0] name, dev = dname, phaselist[d] drv = xtraclass = xtrainfo = xtrastyle = '' if 'htmlclass' in dev: @@ -5194,156 +5277,146 @@ def addScriptCode(hf, testruns): '\n' hf.write(script_code); -def setRuntimeSuspend(before=True): - global sysvals - sv = sysvals - if sv.rs == 0: - return - if before: - # runtime suspend disable or enable - if sv.rs > 0: - sv.rstgt, sv.rsval, sv.rsdir = 'on', 'auto', 'enabled' - else: - sv.rstgt, sv.rsval, sv.rsdir = 'auto', 'on', 'disabled' - pprint('CONFIGURING RUNTIME SUSPEND...') - sv.rslist = deviceInfo(sv.rstgt) - for i in sv.rslist: - sv.setVal(sv.rsval, i) - pprint('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist))) - pprint('waiting 5 seconds...') - time.sleep(5) - else: - # runtime suspend re-enable or re-disable - for i in sv.rslist: - sv.setVal(sv.rstgt, i) - pprint('runtime suspend settings restored on %d devices' % len(sv.rslist)) - # Function: executeSuspend # Description: # Execute system suspend through the sysfs interface, then copy the output # dmesg and ftrace files to the test output directory. def executeSuspend(quiet=False): - pm = ProcessMonitor() - tp = sysvals.tpath - if sysvals.wifi: - wifi = sysvals.checkWifi() + sv, tp, pm = sysvals, sysvals.tpath, ProcessMonitor() + if sv.wifi: + wifi = sv.checkWifi() + sv.dlog('wifi check, connected device is "%s"' % wifi) testdata = [] # run these commands to prepare the system for suspend - if sysvals.display: + if sv.display: if not quiet: - pprint('SET DISPLAY TO %s' % sysvals.display.upper()) - displayControl(sysvals.display) + pprint('SET DISPLAY TO %s' % sv.display.upper()) + ret = sv.displayControl(sv.display) + sv.dlog('xset display %s, ret = %d' % (sv.display, ret)) time.sleep(1) - if sysvals.sync: + if sv.sync: if not quiet: pprint('SYNCING FILESYSTEMS') + sv.dlog('syncing filesystems') call('sync', shell=True) - # mark the start point in the kernel ring buffer just as we start - sysvals.initdmesg() + sv.dlog('read dmesg') + sv.initdmesg() # start ftrace - if(sysvals.usecallgraph or sysvals.usetraceevents): + if(sv.usecallgraph or sv.usetraceevents): if not quiet: pprint('START TRACING') - sysvals.fsetVal('1', 'tracing_on') - if sysvals.useprocmon: + sv.dlog('start ftrace tracing') + sv.fsetVal('1', 'tracing_on') + if sv.useprocmon: + sv.dlog('start the process monitor') pm.start() - sysvals.cmdinfo(True) + sv.dlog('run the cmdinfo list before') + sv.cmdinfo(True) # execute however many s/r runs requested - for count in range(1,sysvals.execcount+1): + for count in range(1,sv.execcount+1): # x2delay in between test runs - if(count > 1 and sysvals.x2delay > 0): - sysvals.fsetVal('WAIT %d' % sysvals.x2delay, 'trace_marker') - time.sleep(sysvals.x2delay/1000.0) - sysvals.fsetVal('WAIT END', 'trace_marker') + if(count > 1 and sv.x2delay > 0): + sv.fsetVal('WAIT %d' % sv.x2delay, 'trace_marker') + time.sleep(sv.x2delay/1000.0) + sv.fsetVal('WAIT END', 'trace_marker') # start message - if sysvals.testcommand != '': + if sv.testcommand != '': pprint('COMMAND START') else: - if(sysvals.rtcwake): + if(sv.rtcwake): pprint('SUSPEND START') else: pprint('SUSPEND START (press a key to resume)') # set rtcwake - if(sysvals.rtcwake): + if(sv.rtcwake): if not quiet: - pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime) - sysvals.rtcWakeAlarmOn() + pprint('will issue an rtcwake in %d seconds' % sv.rtcwaketime) + sv.dlog('enable RTC wake alarm') + sv.rtcWakeAlarmOn() # start of suspend trace marker - if(sysvals.usecallgraph or sysvals.usetraceevents): - sysvals.fsetVal(datetime.now().strftime(sysvals.tmstart), 'trace_marker') + if(sv.usecallgraph or sv.usetraceevents): + sv.fsetVal(datetime.now().strftime(sv.tmstart), 'trace_marker') # predelay delay - if(count == 1 and sysvals.predelay > 0): - sysvals.fsetVal('WAIT %d' % sysvals.predelay, 'trace_marker') - time.sleep(sysvals.predelay/1000.0) - sysvals.fsetVal('WAIT END', 'trace_marker') + if(count == 1 and sv.predelay > 0): + sv.fsetVal('WAIT %d' % sv.predelay, 'trace_marker') + time.sleep(sv.predelay/1000.0) + sv.fsetVal('WAIT END', 'trace_marker') # initiate suspend or command + sv.dlog('system executing a suspend') tdata = {'error': ''} - if sysvals.testcommand != '': - res = call(sysvals.testcommand+' 2>&1', shell=True); + if sv.testcommand != '': + res = call(sv.testcommand+' 2>&1', shell=True); if res != 0: tdata['error'] = 'cmd returned %d' % res else: - mode = sysvals.suspendmode - if sysvals.memmode and os.path.exists(sysvals.mempowerfile): + mode = sv.suspendmode + if sv.memmode and os.path.exists(sv.mempowerfile): mode = 'mem' - pf = open(sysvals.mempowerfile, 'w') - pf.write(sysvals.memmode) - pf.close() - if sysvals.diskmode and os.path.exists(sysvals.diskpowerfile): + sv.testVal(sv.mempowerfile, 'radio', sv.memmode) + if sv.diskmode and os.path.exists(sv.diskpowerfile): mode = 'disk' - pf = open(sysvals.diskpowerfile, 'w') - pf.write(sysvals.diskmode) - pf.close() - if mode == 'freeze' and sysvals.haveTurbostat(): + sv.testVal(sv.diskpowerfile, 'radio', sv.diskmode) + if sv.acpidebug: + sv.testVal(sv.acpipath, 'acpi', '0xe') + if mode == 'freeze' and sv.haveTurbostat(): # execution will pause here - turbo = sysvals.turbostat() + turbo = sv.turbostat() if turbo: tdata['turbo'] = turbo else: - pf = open(sysvals.powerfile, 'w') + pf = open(sv.powerfile, 'w') pf.write(mode) # execution will pause here try: pf.close() except Exception as e: tdata['error'] = str(e) - if(sysvals.rtcwake): - sysvals.rtcWakeAlarmOff() + sv.dlog('system returned from resume') + # reset everything + sv.testVal('restoreall') + if(sv.rtcwake): + sv.dlog('disable RTC wake alarm') + sv.rtcWakeAlarmOff() # postdelay delay - if(count == sysvals.execcount and sysvals.postdelay > 0): - sysvals.fsetVal('WAIT %d' % sysvals.postdelay, 'trace_marker') - time.sleep(sysvals.postdelay/1000.0) - sysvals.fsetVal('WAIT END', 'trace_marker') + if(count == sv.execcount and sv.postdelay > 0): + sv.fsetVal('WAIT %d' % sv.postdelay, 'trace_marker') + time.sleep(sv.postdelay/1000.0) + sv.fsetVal('WAIT END', 'trace_marker') # return from suspend pprint('RESUME COMPLETE') - if(sysvals.usecallgraph or sysvals.usetraceevents): - sysvals.fsetVal(datetime.now().strftime(sysvals.tmend), 'trace_marker') - if sysvals.wifi and wifi: - tdata['wifi'] = sysvals.pollWifi(wifi) - if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'): + if(sv.usecallgraph or sv.usetraceevents): + sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker') + if sv.wifi and wifi: + tdata['wifi'] = sv.pollWifi(wifi) + sv.dlog('wifi check, %s' % tdata['wifi']) + if(sv.suspendmode == 'mem' or sv.suspendmode == 'command'): + sv.dlog('read the ACPI FPDT') tdata['fw'] = getFPDT(False) testdata.append(tdata) - cmdafter = sysvals.cmdinfo(False) + sv.dlog('run the cmdinfo list after') + cmdafter = sv.cmdinfo(False) # stop ftrace - if(sysvals.usecallgraph or sysvals.usetraceevents): - if sysvals.useprocmon: + if(sv.usecallgraph or sv.usetraceevents): + if sv.useprocmon: + sv.dlog('stop the process monitor') pm.stop() - sysvals.fsetVal('0', 'tracing_on') + sv.fsetVal('0', 'tracing_on') # grab a copy of the dmesg output if not quiet: pprint('CAPTURING DMESG') - sysvals.getdmesg(testdata) + sysvals.dlog('EXECUTION TRACE END') + sv.getdmesg(testdata) # grab a copy of the ftrace output - if(sysvals.usecallgraph or sysvals.usetraceevents): + if(sv.usecallgraph or sv.usetraceevents): if not quiet: pprint('CAPTURING TRACE') - op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata) + op = sv.writeDatafileHeader(sv.ftracefile, testdata) fp = open(tp+'trace', 'r') for line in fp: op.write(line) op.close() - sysvals.fsetVal('', 'trace') - sysvals.platforminfo(cmdafter) + sv.fsetVal('', 'trace') + sv.platforminfo(cmdafter) def readFile(file): if os.path.islink(file): @@ -5586,39 +5659,6 @@ def dmidecode(mempath, fatal=False): count += 1 return out -def displayControl(cmd): - xset, ret = 'timeout 10 xset -d :0.0 {0}', 0 - if sysvals.sudouser: - xset = 'sudo -u %s %s' % (sysvals.sudouser, xset) - if cmd == 'init': - ret = call(xset.format('dpms 0 0 0'), shell=True) - if not ret: - ret = call(xset.format('s off'), shell=True) - elif cmd == 'reset': - ret = call(xset.format('s reset'), shell=True) - elif cmd in ['on', 'off', 'standby', 'suspend']: - b4 = displayControl('stat') - ret = call(xset.format('dpms force %s' % cmd), shell=True) - if not ret: - curr = displayControl('stat') - sysvals.vprint('Display Switched: %s -> %s' % (b4, curr)) - if curr != cmd: - sysvals.vprint('WARNING: Display failed to change to %s' % cmd) - if ret: - sysvals.vprint('WARNING: Display failed to change to %s with xset' % cmd) - return ret - elif cmd == 'stat': - fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout - ret = 'unknown' - for line in fp: - m = re.match('[\s]*Monitor is (?P.*)', ascii(line)) - if(m and len(m.group('m')) >= 2): - out = m.group('m').lower() - ret = out[3:] if out[0:2] == 'in' else out - break - fp.close() - return ret - # Function: getFPDT # Description: # Read the acpi bios tables and pull out FPDT, the firmware data @@ -6001,8 +6041,19 @@ def rerunTest(htmlfile=''): # execute a suspend/resume, gather the logs, and generate the output def runTest(n=0, quiet=False): # prepare for the test - sysvals.initFtrace(quiet) sysvals.initTestOutput('suspend') + op = sysvals.writeDatafileHeader(sysvals.dmesgfile, []) + op.write('# EXECUTION TRACE START\n') + op.close() + if n <= 1: + if sysvals.rs != 0: + sysvals.dlog('%sabling runtime suspend' % ('en' if sysvals.rs > 0 else 'dis')) + sysvals.setRuntimeSuspend(True) + if sysvals.display: + ret = sysvals.displayControl('init') + sysvals.dlog('xset display init, ret = %d' % ret) + sysvals.dlog('initialize ftrace') + sysvals.initFtrace(quiet) # execute the test executeSuspend(quiet) @@ -6098,8 +6149,16 @@ def data_from_html(file, outpath, issues, fulldetail=False): if wifi: extra['wifi'] = wifi low = find_in_html(html, 'freeze time: ', ' ms') - if low and 'waking' in low: - issue = 'FREEZEWAKE' + for lowstr in ['waking', '+']: + if not low: + break + if lowstr not in low: + continue + if lowstr == '+': + issue = 'S2LOOPx%d' % len(low.split('+')) + else: + m = re.match('.*waking *(?P[0-9]*) *times.*', low) + issue = 'S2WAKEx%s' % m.group('n') if m else 'S2WAKExNaN' match = [i for i in issues if i['match'] == issue] if len(match) > 0: match[0]['count'] += 1 @@ -6605,6 +6664,11 @@ if __name__ == '__main__': val = next(args) except: doError('-info requires one string argument', True) + elif(arg == '-desc'): + try: + val = next(args) + except: + doError('-desc requires one string argument', True) elif(arg == '-rs'): try: val = next(args) @@ -6814,9 +6878,9 @@ if __name__ == '__main__': runSummary(sysvals.outdir, True, genhtml) elif(cmd in ['xon', 'xoff', 'xstandby', 'xsuspend', 'xinit', 'xreset']): sysvals.verbose = True - ret = displayControl(cmd[1:]) + ret = sysvals.displayControl(cmd[1:]) elif(cmd == 'xstat'): - pprint('Display Status: %s' % displayControl('stat').upper()) + pprint('Display Status: %s' % sysvals.displayControl('stat').upper()) elif(cmd == 'wificheck'): dev = sysvals.checkWifi() if dev: @@ -6854,12 +6918,8 @@ if __name__ == '__main__': if mode.startswith('disk-'): sysvals.diskmode = mode.split('-', 1)[-1] sysvals.suspendmode = 'disk' - sysvals.systemInfo(dmidecode(sysvals.mempath)) - setRuntimeSuspend(True) - if sysvals.display: - displayControl('init') failcnt, ret = 0, 0 if sysvals.multitest['run']: # run multiple tests in a separate subdirectory @@ -6900,7 +6960,10 @@ if __name__ == '__main__': sysvals.testdir = sysvals.outdir # run the test in the current directory ret = runTest() + + # reset to default values after testing if sysvals.display: - displayControl('reset') - setRuntimeSuspend(False) + sysvals.displayControl('reset') + if sysvals.rs != 0: + sysvals.setRuntimeSuspend(False) sys.exit(ret) -- cgit v1.3.1 From 93035242d9e22f2aad6ac0b886f19444713c0089 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 15 Nov 2020 14:06:50 +0200 Subject: tools/testing/scatterlist: Test dynamic __sg_alloc_table_from_pages Add few cases to test the dynamic allocation flow of __sg_alloc_table_from_pages. Link: https://lore.kernel.org/r/20201115120650.139277-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- tools/testing/scatterlist/main.c | 64 +++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index f561aed7c657..2d27e47bbf2f 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -9,6 +9,7 @@ struct test { int alloc_ret; unsigned num_pages; unsigned *pfn; + unsigned *pfn_app; unsigned size; unsigned int max_seg; unsigned int expected_segments; @@ -52,31 +53,39 @@ int main(void) { const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; struct test *test, tests[] = { - { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 }, - { 0, 1, pfn(0), 1, sgmax, 1 }, - { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 }, - { 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 }, - { 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 }, - { 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 }, - { 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 }, - { 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 }, - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 }, - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 }, - { 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 0, NULL, 0, 0, 0 }, + { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, + { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, + { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, + { 0, 2, pfn(1, 0), NULL, 2 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), pfn(3, 4, 5), 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), pfn(4, 5, 6), 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(0, 2, 1), NULL, 3 * PAGE_SIZE, sgmax, 3 }, + { 0, 3, pfn(0, 1, 3), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(1, 2, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(1, 3, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 4, pfn(0, 1, 3, 4), NULL, 4 * PAGE_SIZE, sgmax, 2 }, + { 0, 5, pfn(0, 1, 3, 4, 5), NULL, 5 * PAGE_SIZE, sgmax, 2 }, + { 0, 5, pfn(0, 1, 3, 4, 6), NULL, 5 * PAGE_SIZE, sgmax, 3 }, + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, sgmax, 1 }, + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, 2 * PAGE_SIZE, + 3 }, + { 0, 6, pfn(0, 1, 2, 3, 4, 5), NULL, 6 * PAGE_SIZE, + 2 * PAGE_SIZE, 3 }, + { 0, 6, pfn(0, 2, 3, 4, 5, 6), NULL, 6 * PAGE_SIZE, + 2 * PAGE_SIZE, 4 }, + { 0, 6, pfn(0, 1, 3, 4, 5, 6), pfn(7, 8, 9, 10, 11, 12), + 6 * PAGE_SIZE, 12 * PAGE_SIZE, 2 }, + { 0, 0, NULL, NULL, 0, 0, 0 }, }; unsigned int i; for (i = 0, test = tests; test->expected_segments; test++, i++) { + int left_pages = test->pfn_app ? test->num_pages : 0; struct page *pages[MAX_PAGES]; struct sg_table st; struct scatterlist *sg; @@ -84,14 +93,23 @@ int main(void) set_pages(pages, test->pfn, test->num_pages); sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, - test->size, test->max_seg, NULL, 0, GFP_KERNEL); + test->size, test->max_seg, NULL, left_pages, GFP_KERNEL); assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); if (test->alloc_ret) continue; + if (test->pfn_app) { + set_pages(pages, test->pfn_app, test->num_pages); + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, + test->size, test->max_seg, sg, 0, GFP_KERNEL); + + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); + } + VALIDATE(st.nents == test->expected_segments, &st, test); - VALIDATE(st.orig_nents == test->expected_segments, &st, test); + if (!test->pfn_app) + VALIDATE(st.orig_nents == test->expected_segments, &st, test); sg_free_table(&st); } -- cgit v1.3.1 From 716572b0003ef67a4889bd7d85baf5099c5a0248 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 2 Nov 2020 11:51:10 -0800 Subject: selftests/x86/fsgsbase: Fix GS == 1, 2, and 3 tests Setting GS to 1, 2, or 3 causes a nonsensical part of the IRET microcode to change GS back to zero on a return from kernel mode to user mode. The result is that these tests fail randomly depending on when interrupts happen. Detect when this happens and let the test pass. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/7567fd44a1d60a9424f25b19a998f12149993b0d.1604346596.git.luto@kernel.org --- tools/testing/selftests/x86/fsgsbase.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 7161cfc2e60b..8c780cce941d 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -392,8 +392,8 @@ static void set_gs_and_switch_to(unsigned long local, local = read_base(GS); /* - * Signal delivery seems to mess up weird selectors. Put it - * back. + * Signal delivery is quite likely to change a selector + * of 1, 2, or 3 back to 0 due to IRET being defective. */ asm volatile ("mov %0, %%gs" : : "rm" (force_sel)); } else { @@ -411,6 +411,14 @@ static void set_gs_and_switch_to(unsigned long local, if (base == local && sel_pre_sched == sel_post_sched) { printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n", sel_pre_sched, local); + } else if (base == local && sel_pre_sched >= 1 && sel_pre_sched <= 3 && + sel_post_sched == 0) { + /* + * IRET is misdesigned and will squash selectors 1, 2, or 3 + * to zero. Don't fail the test just because this happened. + */ + printf("[OK]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx because IRET is defective\n", + sel_pre_sched, local, sel_post_sched, base); } else { nerrs++; printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n", -- cgit v1.3.1 From aeaaf005da1de075929e56562dced4a58238efc4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 2 Nov 2020 11:51:11 -0800 Subject: selftests/x86: Add missing .note.GNU-stack sections Several of the x86 selftests end up with executable stacks because the asm was missing the annotation that says that they are modern and don't need executable stacks. Add the annotations. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/6f043c03e9e0e4557e1e975a63b07a4d18965a68.1604346596.git.luto@kernel.org --- tools/testing/selftests/x86/raw_syscall_helper_32.S | 2 ++ tools/testing/selftests/x86/thunks.S | 2 ++ 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S index 94410fa2b5ed..a10d36afdca0 100644 --- a/tools/testing/selftests/x86/raw_syscall_helper_32.S +++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S @@ -45,3 +45,5 @@ int80_and_ret: .type int80_and_ret, @function .size int80_and_ret, .-int80_and_ret + +.section .note.GNU-stack,"",%progbits diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S index 1bb5d62c16a4..a2d47d8344d4 100644 --- a/tools/testing/selftests/x86/thunks.S +++ b/tools/testing/selftests/x86/thunks.S @@ -57,3 +57,5 @@ call32_from_64: ret .size call32_from_64, .-call32_from_64 + +.section .note.GNU-stack,"",%progbits -- cgit v1.3.1 From 24eb2a02a68c98d46878214f46f855d934dc73ff Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 23 Nov 2020 09:12:26 +0200 Subject: selftests: mlxsw: Add blackhole nexthop configuration tests Test the mlxsw allows blackhole nexthops to be installed and that the nexthops are marked as offloaded. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 25 +++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 5de47d72f8c9..a2eff5f58209 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -32,6 +32,7 @@ ALL_TESTS=" nexthop_obj_invalid_test nexthop_obj_offload_test nexthop_obj_group_offload_test + nexthop_obj_blackhole_offload_test nexthop_obj_route_offload_test devlink_reload_test " @@ -693,9 +694,6 @@ nexthop_obj_invalid_test() ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1 check_fail $? "managed to configure a nexthop with MPLS encap when should not" - ip nexthop add id 1 blackhole - check_fail $? "managed to configure a blackhole nexthop when should not" - ip nexthop add id 1 dev $swp1 ip nexthop add id 2 dev $swp1 ip nexthop add id 10 group 1/2 @@ -817,6 +815,27 @@ nexthop_obj_group_offload_test() simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 } +nexthop_obj_blackhole_offload_test() +{ + # Test offload indication of blackhole nexthop objects + RET=0 + + ip nexthop add id 1 blackhole + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "Blackhole nexthop not marked as offloaded when should" + + ip nexthop add id 10 group 1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 10 + check_err $? "Nexthop group not marked as offloaded when should" + + log_test "blackhole nexthop objects offload indication" + + ip nexthop del id 10 + ip nexthop del id 1 +} + nexthop_obj_route_offload_test() { # Test offload indication of routes using nexthop objects -- cgit v1.3.1 From 1beaff779f783268dcc61c0a8fbd16e9c8b0abfa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 23 Nov 2020 09:12:27 +0200 Subject: selftests: forwarding: Add blackhole nexthops tests Test that IPv4 and IPv6 ping fail when the route is using a blackhole nexthop or a group with a blackhole nexthop. Test that ping passes when the route starts using a valid nexthop. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/router_mpath_nh.sh | 58 +++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index e8c2573d5232..388e4492b81b 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -1,7 +1,13 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test" +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_test + ping_ipv4_blackhole + ping_ipv6_blackhole +" NUM_NETIFS=8 source lib.sh @@ -302,6 +308,56 @@ multipath_test() multipath6_l4_test "Weighted MP 11:45" 11 45 } +ping_ipv4_blackhole() +{ + RET=0 + + ip nexthop add id 1001 blackhole + ip nexthop add id 1002 group 1001 + + ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001 + ping_do $h1 198.51.100.2 + check_fail $? "ping did not fail when using a blackhole nexthop" + + ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002 + ping_do $h1 198.51.100.2 + check_fail $? "ping did not fail when using a blackhole nexthop group" + + ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103 + ping_do $h1 198.51.100.2 + check_err $? "ping failed with a valid nexthop" + + log_test "IPv4 blackhole ping" + + ip nexthop del id 1002 + ip nexthop del id 1001 +} + +ping_ipv6_blackhole() +{ + RET=0 + + ip -6 nexthop add id 1001 blackhole + ip nexthop add id 1002 group 1001 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001 + ping6_do $h1 2001:db8:2::2 + check_fail $? "ping did not fail when using a blackhole nexthop" + + ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002 + ping6_do $h1 2001:db8:2::2 + check_fail $? "ping did not fail when using a blackhole nexthop group" + + ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106 + ping6_do $h1 2001:db8:2::2 + check_err $? "ping failed with a valid nexthop" + + log_test "IPv6 blackhole ping" + + ip nexthop del id 1002 + ip -6 nexthop del id 1001 +} + setup_prepare() { h1=${NETIFS[p1]} -- cgit v1.3.1 From 84e8feeadcf048903e65b7d82769c58576c506b0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 23 Nov 2020 09:12:30 +0200 Subject: selftests: mlxsw: Add blackhole_nexthop trap test Test that packets hitting a blackhole nexthop are trapped to the CPU when the trap is enabled. Test that packets are not reported when the trap is disabled. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../drivers/net/mlxsw/devlink_trap_l3_drops.sh | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index f5abb1ebd392..4029833f7e27 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -52,6 +52,7 @@ ALL_TESTS=" blackhole_route_test irif_disabled_test erif_disabled_test + blackhole_nexthop_test " NUM_NETIFS=4 @@ -647,6 +648,41 @@ erif_disabled_test() devlink_trap_action_set $trap_name "drop" } +__blackhole_nexthop_test() +{ + local flags=$1; shift + local subnet=$1; shift + local proto=$1; shift + local dip=$1; shift + local trap_name="blackhole_nexthop" + local mz_pid + + RET=0 + + ip -$flags nexthop add id 1 blackhole + ip -$flags route add $subnet nhid 1 + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower skip_hw dst_ip $dip ip_proto udp action drop + + # Generate packets to the blackhole nexthop + $MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \ + -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + log_test "Blackhole nexthop: IPv$flags" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 + ip -$flags route del $subnet + ip -$flags nexthop del id 1 +} + +blackhole_nexthop_test() +{ + __blackhole_nexthop_test "4" "198.51.100.0/30" "ip" $h2_ipv4 + __blackhole_nexthop_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6 +} + trap cleanup EXIT setup_prepare -- cgit v1.3.1 From 05a98d7672731aeb5f9837b35cc7fe70444e70bd Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Sat, 21 Nov 2020 21:22:04 -0500 Subject: selftest/bpf: Fix link in readme The link was bad because of invalid rst; it was pointing to itself and was rendering badly. Signed-off-by: Andrei Matei Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201122022205.57229-1-andreimatei1@gmail.com --- tools/testing/selftests/bpf/README.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index ac9eda830187..3b8d8885892d 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -2,7 +2,10 @@ BPF Selftest Notes ================== General instructions on running selftests can be found in -`Documentation/bpf/bpf_devel_QA.rst`_. +`Documentation/bpf/bpf_devel_QA.rst`__. + +__ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests + Additional information about selftest failures are documented here. -- cgit v1.3.1 From 1c26ac6ab3ce47ee2e6342373681dedbb97e21a3 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Sat, 21 Nov 2020 21:22:05 -0500 Subject: selftest/bpf: Fix rst formatting in readme A couple of places in the readme had invalid rst formatting causing the rendering to be off. This patch fixes them with minimal edits. Signed-off-by: Andrei Matei Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201122022205.57229-2-andreimatei1@gmail.com --- tools/testing/selftests/bpf/README.rst | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 3b8d8885892d..ca064180d4d0 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -33,11 +33,12 @@ The verifier will reject such code with above error. At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and the insn 20 undoes map_value addition. It is currently impossible for the verifier to understand such speculative pointer arithmetic. -Hence - https://reviews.llvm.org/D85570 -addresses it on the compiler side. It was committed on llvm 12. +Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12. + +__ https://reviews.llvm.org/D85570 The corresponding C code + .. code-block:: c for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { @@ -80,10 +81,11 @@ The symptom for ``bpf_iter/netlink`` looks like 17: (7b) *(u64 *)(r7 +0) = r2 only read is supported -This is due to a llvm BPF backend bug. The fix - https://reviews.llvm.org/D78466 +This is due to a llvm BPF backend bug. `The fix`__ has been pushed to llvm 10.x release branch and will be -available in 10.0.1. The fix is available in llvm 11.0.0 trunk. +available in 10.0.1. The patch is available in llvm 11.0.0 trunk. + +__ https://reviews.llvm.org/D78466 BPF CO-RE-based tests and Clang version ======================================= @@ -97,11 +99,11 @@ them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too old to support them, they shouldn't cause build failures or runtime test failures: - - __builtin_btf_type_id() ([0], [1], [2]); - - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]). +- __builtin_btf_type_id() [0_, 1_, 2_]; +- __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_]. - [0] https://reviews.llvm.org/D74572 - [1] https://reviews.llvm.org/D74668 - [2] https://reviews.llvm.org/D85174 - [3] https://reviews.llvm.org/D83878 - [4] https://reviews.llvm.org/D83242 +.. _0: https://reviews.llvm.org/D74572 +.. _1: https://reviews.llvm.org/D74668 +.. _2: https://reviews.llvm.org/D85174 +.. _3: https://reviews.llvm.org/D83878 +.. _4: https://reviews.llvm.org/D83242 -- cgit v1.3.1 From 68878a5c5b852d17f5827ce8a0f6fbd8b4cdfada Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 24 Nov 2020 18:41:00 +0800 Subject: bpftool: Fix error return value in build_btf_type_table An appropriate return value should be set on the failed path. Fixes: 4d374ba0bf30 ("tools: bpftool: implement "bpftool btf show|list"") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201124104100.491-1-thunder.leizhen@huawei.com --- tools/bpf/bpftool/btf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 8ab142ff5eac..2afb7d5b1aca 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -693,6 +693,7 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, obj_node = calloc(1, sizeof(*obj_node)); if (!obj_node) { p_err("failed to allocate memory: %s", strerror(errno)); + err = -ENOMEM; goto err_free; } -- cgit v1.3.1 From db13db9f67fe5049159a05e870daedcee5879f8d Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 24 Nov 2020 15:21:14 +0800 Subject: libbpf: Add support for canceling cached_cons advance Add a new function for returning descriptors the user received after an xsk_ring_cons__peek call. After the application has gotten a number of descriptors from a ring, it might not be able to or want to process them all for various reasons. Therefore, it would be useful to have an interface for returning or cancelling a number of them so that they are returned to the ring. This patch adds a new function called xsk_ring_cons__cancel that performs this operation on nb descriptors counted from the end of the batch of descriptors that was received through the peek call. Signed-off-by: Li RongQing Signed-off-by: Daniel Borkmann [ Magnus Karlsson: rewrote changelog ] Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/1606202474-8119-1-git-send-email-lirongqing@baidu.com --- tools/lib/bpf/xsk.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 1069c46364ff..1719a327e5f9 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -153,6 +153,12 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, return entries; } +static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, + size_t nb) +{ + cons->cached_cons -= nb; +} + static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) { /* Make sure data has been read before indicating we are done -- cgit v1.3.1 From 27672f0d280a3f286a410a8db2004f46ace72a17 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 24 Nov 2020 15:12:09 +0000 Subject: bpf: Add a BPF helper for getting the IMA hash of an inode Provide a wrapper function to get the IMA hash of an inode. This helper is useful in fingerprinting files (e.g executables on execution) and using these fingerprints in detections like an executable unlinking itself. Since the ima_inode_hash can sleep, it's only allowed for sleepable LSM hooks. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201124151210.1081188-3-kpsingh@chromium.org --- include/uapi/linux/bpf.h | 11 +++++++++++ kernel/bpf/bpf_lsm.c | 26 ++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 11 +++++++++++ 4 files changed, 50 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3ca6146f001a..c3458ec1f30a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3807,6 +3807,16 @@ union bpf_attr { * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) * Return * Current *ktime*. + * + * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) + * Description + * Returns the stored IMA hash of the *inode* (if it's avaialable). + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if + * invalid arguments are passed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3970,6 +3980,7 @@ union bpf_attr { FN(get_current_task_btf), \ FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ + FN(ima_inode_hash), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index b4f27a874092..70e5e0b6d69d 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -15,6 +15,7 @@ #include #include #include +#include /* For every LSM hook that allows attachment of BPF programs, declare a nop * function where a BPF program can be attached. @@ -75,6 +76,29 @@ const static struct bpf_func_proto bpf_bprm_opts_set_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_ima_inode_hash, struct inode *, inode, void *, dst, u32, size) +{ + return ima_inode_hash(inode, dst, size); +} + +static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog) +{ + return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); +} + +BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode) + +const static struct bpf_func_proto bpf_ima_inode_hash_proto = { + .func = bpf_ima_inode_hash, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0], + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, + .allowed = bpf_ima_inode_hash_allowed, +}; + static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -97,6 +121,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_task_storage_delete_proto; case BPF_FUNC_bprm_opts_set: return &bpf_bprm_opts_set_proto; + case BPF_FUNC_ima_inode_hash: + return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index c5bc947a70ad..8b829748d488 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -436,6 +436,7 @@ class PrinterHelpers(Printer): 'struct xdp_md', 'struct path', 'struct btf_ptr', + 'struct inode', ] known_types = { '...', @@ -480,6 +481,7 @@ class PrinterHelpers(Printer): 'struct task_struct', 'struct path', 'struct btf_ptr', + 'struct inode', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3ca6146f001a..c3458ec1f30a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3807,6 +3807,16 @@ union bpf_attr { * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) * Return * Current *ktime*. + * + * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) + * Description + * Returns the stored IMA hash of the *inode* (if it's avaialable). + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if + * invalid arguments are passed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3970,6 +3980,7 @@ union bpf_attr { FN(get_current_task_btf), \ FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ + FN(ima_inode_hash), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 34b82d3ac1058653b3de7be4697b55f67533b1f1 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 24 Nov 2020 15:12:10 +0000 Subject: bpf: Add a selftest for bpf_ima_inode_hash The test does the following: - Mounts a loopback filesystem and appends the IMA policy to measure executions only on this file-system. Restricting the IMA policy to a particular filesystem prevents a system-wide IMA policy change. - Executes an executable copied to this loopback filesystem. - Calls the bpf_ima_inode_hash in the bprm_committed_creds hook and checks if the call succeeded and checks if a hash was calculated. The test shells out to the added ima_setup.sh script as the setup is better handled in a shell script and is more complicated to do in the test program or even shelling out individual commands from C. The list of required configs (i.e. IMA, SECURITYFS, IMA_{WRITE,READ}_POLICY) for running this test are also updated. Suggested-by: Mimi Zohar (limit policy rule to loopback mount) Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201124151210.1081188-4-kpsingh@chromium.org --- tools/testing/selftests/bpf/config | 4 ++ tools/testing/selftests/bpf/ima_setup.sh | 80 +++++++++++++++++++++++ tools/testing/selftests/bpf/prog_tests/test_ima.c | 74 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/ima.c | 28 ++++++++ 4 files changed, 186 insertions(+) create mode 100755 tools/testing/selftests/bpf/ima_setup.sh create mode 100644 tools/testing/selftests/bpf/prog_tests/test_ima.c create mode 100644 tools/testing/selftests/bpf/progs/ima.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 2118e23ac07a..365bf9771b07 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -39,3 +39,7 @@ CONFIG_BPF_JIT=y CONFIG_BPF_LSM=y CONFIG_SECURITY=y CONFIG_LIRC=y +CONFIG_IMA=y +CONFIG_SECURITYFS=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IMA_READ_POLICY=y diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh new file mode 100755 index 000000000000..15490ccc5e55 --- /dev/null +++ b/tools/testing/selftests/bpf/ima_setup.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u + +IMA_POLICY_FILE="/sys/kernel/security/ima/policy" +TEST_BINARY="/bin/true" + +usage() +{ + echo "Usage: $0 " + exit 1 +} + +setup() +{ + local tmp_dir="$1" + local mount_img="${tmp_dir}/test.img" + local mount_dir="${tmp_dir}/mnt" + local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})" + mkdir -p ${mount_dir} + + dd if=/dev/zero of="${mount_img}" bs=1M count=10 + + local loop_device="$(losetup --find --show ${mount_img})" + + mkfs.ext4 "${loop_device}" + mount "${loop_device}" "${mount_dir}" + + cp "${TEST_BINARY}" "${mount_dir}" + local mount_uuid="$(blkid -s UUID -o value ${loop_device})" + echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE} +} + +cleanup() { + local tmp_dir="$1" + local mount_img="${tmp_dir}/test.img" + local mount_dir="${tmp_dir}/mnt" + + local loop_devices=$(losetup -j ${mount_img} -O NAME --noheadings) + for loop_dev in "${loop_devices}"; do + losetup -d $loop_dev + done + + umount ${mount_dir} + rm -rf ${tmp_dir} +} + +run() +{ + local tmp_dir="$1" + local mount_dir="${tmp_dir}/mnt" + local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})" + + exec "${copied_bin_path}" +} + +main() +{ + [[ $# -ne 2 ]] && usage + + local action="$1" + local tmp_dir="$2" + + [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1 + + if [[ "${action}" == "setup" ]]; then + setup "${tmp_dir}" + elif [[ "${action}" == "cleanup" ]]; then + cleanup "${tmp_dir}" + elif [[ "${action}" == "run" ]]; then + run "${tmp_dir}" + else + echo "Unknown action: ${action}" + exit 1 + fi +} + +main "$@" diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c new file mode 100644 index 000000000000..61fca681d524 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2020 Google LLC. + */ + +#include +#include +#include +#include +#include + +#include "ima.skel.h" + +static int run_measured_process(const char *measured_dir, u32 *monitored_pid) +{ + int child_pid, child_status; + + child_pid = fork(); + if (child_pid == 0) { + *monitored_pid = getpid(); + execlp("./ima_setup.sh", "./ima_setup.sh", "run", measured_dir, + NULL); + exit(errno); + + } else if (child_pid > 0) { + waitpid(child_pid, &child_status, 0); + return WEXITSTATUS(child_status); + } + + return -EINVAL; +} + +void test_test_ima(void) +{ + char measured_dir_template[] = "/tmp/ima_measuredXXXXXX"; + const char *measured_dir; + char cmd[256]; + + int err, duration = 0; + struct ima *skel = NULL; + + skel = ima__open_and_load(); + if (CHECK(!skel, "skel_load", "skeleton failed\n")) + goto close_prog; + + err = ima__attach(skel); + if (CHECK(err, "attach", "attach failed: %d\n", err)) + goto close_prog; + + measured_dir = mkdtemp(measured_dir_template); + if (CHECK(measured_dir == NULL, "mkdtemp", "err %d\n", errno)) + goto close_prog; + + snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir); + if (CHECK_FAIL(system(cmd))) + goto close_clean; + + err = run_measured_process(measured_dir, &skel->bss->monitored_pid); + if (CHECK(err, "run_measured_process", "err = %d\n", err)) + goto close_clean; + + CHECK(skel->data->ima_hash_ret < 0, "ima_hash_ret", + "ima_hash_ret = %ld\n", skel->data->ima_hash_ret); + + CHECK(skel->bss->ima_hash == 0, "ima_hash", + "ima_hash = %lu\n", skel->bss->ima_hash); + +close_clean: + snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir); + CHECK_FAIL(system(cmd)); +close_prog: + ima__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c new file mode 100644 index 000000000000..86b21aff4bc5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/ima.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include "vmlinux.h" +#include +#include +#include + +long ima_hash_ret = -1; +u64 ima_hash = 0; +u32 monitored_pid = 0; + +char _license[] SEC("license") = "GPL"; + +SEC("lsm.s/bprm_committed_creds") +int BPF_PROG(ima, struct linux_binprm *bprm) +{ + u32 pid = bpf_get_current_pid_tgid() >> 32; + + if (pid == monitored_pid) + ima_hash_ret = bpf_ima_inode_hash(bprm->file->f_inode, + &ima_hash, sizeof(ima_hash)); + + return 0; +} -- cgit v1.3.1 From fb3558127cb62ba2dea9e3d0efa1bb1d7e5eee2a Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Tue, 24 Nov 2020 22:52:55 -0500 Subject: bpf: Fix selftest compilation on clang 11 Before this patch, profiler.inc.h wouldn't compile with clang-11 (before the __builtin_preserve_enum_value LLVM builtin was introduced in https://reviews.llvm.org/D83242). Another test that uses this builtin (test_core_enumval) is conditionally skipped if the compiler is too old. In that spirit, this patch inhibits part of populate_cgroup_info(), which needs this CO-RE builtin. The selftests build again on clang-11. The affected test (the profiler test) doesn't pass on clang-11 because it's missing https://reviews.llvm.org/D85570, but at least the test suite as a whole compiles. The test's expected failure is already called out in the README. Signed-off-by: Andrei Matei Signed-off-by: Daniel Borkmann Tested-by: Florian Lehner Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201125035255.17970-1-andreimatei1@gmail.com --- tools/testing/selftests/bpf/progs/profiler.inc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 30982a7e4d0f..4896fdf816f7 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -256,6 +256,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); +#if __has_builtin(__builtin_preserve_enum_value) if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, pids_cgrp_id___local); @@ -275,6 +276,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, } } } +#endif cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); -- cgit v1.3.1 From 75eeaddd57f4a0ac89110547221df8f3757d5a6f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:26 +0800 Subject: perf arm-spe: Refactor printing string to buffer When outputs strings to the decoding buffer with function snprintf(), SPE decoder needs to detects if any error returns from snprintf() and if so needs to directly bail out. If snprintf() returns success, it needs to update buffer pointer and reduce the buffer length so can continue to output the next string into the consequent memory space. This complex logics are spreading in the function arm_spe_pkt_desc() so there has many duplicate codes for handling error detecting, increment buffer pointer and decrement buffer size. To avoid the duplicate code, this patch introduces a new helper function arm_spe_pkt_out_string() which is used to wrap up the complex logics, and it's used by the caller arm_spe_pkt_desc(). This patch moves the variable 'blen' as the function's local variable so allows to remove the unnecessary braces and improve the readability. This patch simplifies the return value for arm_spe_pkt_desc(): '0' means success and other values mean an error has occurred. To realize this, it relies on arm_spe_pkt_out_string()'s parameter 'err', the 'err' is a cumulative value, returns its final value if printing buffer is called for one time or multiple times. Finally, the error is handled in a central place, rather than directly bailing out in switch-cases, it returns error at the end of arm_spe_pkt_desc(). This patch changes the caller arm_spe_dump() to respect the updated return value semantics of arm_spe_pkt_desc(). Suggested-by: Dave Martin Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Reviewed-by: Dave Martin Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 302 ++++++++++----------- tools/perf/util/arm-spe.c | 2 +- 2 files changed, 151 insertions(+), 153 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 671a4763fb47..fbededc1bcd4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "arm-spe-pkt-decoder.h" @@ -258,192 +259,189 @@ int arm_spe_get_packet(const unsigned char *buf, size_t len, return ret; } +static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, + const char *fmt, ...) +{ + va_list ap; + int ret; + + /* Bail out if any error occurred */ + if (err && *err) + return *err; + + va_start(ap, fmt); + ret = vsnprintf(*buf_p, *blen, fmt, ap); + va_end(ap); + + if (ret < 0) { + if (err && !*err) + *err = ret; + + /* + * A return value of *blen or more means that the output was + * truncated and the buffer is overrun. + */ + } else if ((size_t)ret >= *blen) { + (*buf_p)[*blen - 1] = '\0'; + + /* + * Set *err to 'ret' to avoid overflow if tries to + * fill this buffer sequentially. + */ + if (err && !*err) + *err = ret; + } else { + *buf_p += ret; + *blen -= ret; + } + + return ret; +} + int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { - int ret, ns, el, idx = packet->index; + int ns, el, idx = packet->index; unsigned long long payload = packet->payload; const char *name = arm_spe_pkt_name(packet->type); + char *buf_orig = buf; + size_t blen = buf_len; + int err = 0; switch (packet->type) { case ARM_SPE_BAD: case ARM_SPE_PAD: case ARM_SPE_END: - return snprintf(buf, buf_len, "%s", name); - case ARM_SPE_EVENTS: { - size_t blen = buf_len; - - ret = 0; - ret = snprintf(buf, buf_len, "EV"); - buf += ret; - blen -= ret; - if (payload & 0x1) { - ret = snprintf(buf, buf_len, " EXCEPTION-GEN"); - buf += ret; - blen -= ret; - } - if (payload & 0x2) { - ret = snprintf(buf, buf_len, " RETIRED"); - buf += ret; - blen -= ret; - } - if (payload & 0x4) { - ret = snprintf(buf, buf_len, " L1D-ACCESS"); - buf += ret; - blen -= ret; - } - if (payload & 0x8) { - ret = snprintf(buf, buf_len, " L1D-REFILL"); - buf += ret; - blen -= ret; - } - if (payload & 0x10) { - ret = snprintf(buf, buf_len, " TLB-ACCESS"); - buf += ret; - blen -= ret; - } - if (payload & 0x20) { - ret = snprintf(buf, buf_len, " TLB-REFILL"); - buf += ret; - blen -= ret; - } - if (payload & 0x40) { - ret = snprintf(buf, buf_len, " NOT-TAKEN"); - buf += ret; - blen -= ret; - } - if (payload & 0x80) { - ret = snprintf(buf, buf_len, " MISPRED"); - buf += ret; - blen -= ret; - } + arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name); + break; + case ARM_SPE_EVENTS: + arm_spe_pkt_out_string(&err, &buf, &blen, "EV"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &blen, " EXCEPTION-GEN"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &blen, " RETIRED"); + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-ACCESS"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-REFILL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-ACCESS"); + if (payload & 0x20) + arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-REFILL"); + if (payload & 0x40) + arm_spe_pkt_out_string(&err, &buf, &blen, " NOT-TAKEN"); + if (payload & 0x80) + arm_spe_pkt_out_string(&err, &buf, &blen, " MISPRED"); if (idx > 1) { - if (payload & 0x100) { - ret = snprintf(buf, buf_len, " LLC-ACCESS"); - buf += ret; - blen -= ret; - } - if (payload & 0x200) { - ret = snprintf(buf, buf_len, " LLC-REFILL"); - buf += ret; - blen -= ret; - } - if (payload & 0x400) { - ret = snprintf(buf, buf_len, " REMOTE-ACCESS"); - buf += ret; - blen -= ret; - } + if (payload & 0x100) + arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-ACCESS"); + if (payload & 0x200) + arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-REFILL"); + if (payload & 0x400) + arm_spe_pkt_out_string(&err, &buf, &blen, " REMOTE-ACCESS"); } - if (ret < 0) - return ret; - blen -= ret; - return buf_len - blen; - } + break; case ARM_SPE_OP_TYPE: switch (idx) { - case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ? - "COND-SELECT" : "INSN-OTHER"); - case 1: { - size_t blen = buf_len; + case 0: + arm_spe_pkt_out_string(&err, &buf, &blen, + payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &blen, + payload & 0x1 ? "ST" : "LD"); - if (payload & 0x1) - ret = snprintf(buf, buf_len, "ST"); - else - ret = snprintf(buf, buf_len, "LD"); - buf += ret; - blen -= ret; if (payload & 0x2) { - if (payload & 0x4) { - ret = snprintf(buf, buf_len, " AT"); - buf += ret; - blen -= ret; - } - if (payload & 0x8) { - ret = snprintf(buf, buf_len, " EXCL"); - buf += ret; - blen -= ret; - } - if (payload & 0x10) { - ret = snprintf(buf, buf_len, " AR"); - buf += ret; - blen -= ret; - } + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &blen, " AT"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &blen, " EXCL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &blen, " AR"); } else if (payload & 0x4) { - ret = snprintf(buf, buf_len, " SIMD-FP"); - buf += ret; - blen -= ret; - } - if (ret < 0) - return ret; - blen -= ret; - return buf_len - blen; - } - case 2: { - size_t blen = buf_len; - - ret = snprintf(buf, buf_len, "B"); - buf += ret; - blen -= ret; - if (payload & 0x1) { - ret = snprintf(buf, buf_len, " COND"); - buf += ret; - blen -= ret; - } - if (payload & 0x2) { - ret = snprintf(buf, buf_len, " IND"); - buf += ret; - blen -= ret; - } - if (ret < 0) - return ret; - blen -= ret; - return buf_len - blen; + arm_spe_pkt_out_string(&err, &buf, &blen, " SIMD-FP"); } - default: return 0; + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, "B"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &blen, " COND"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &blen, " IND"); + + break; + default: + /* Unknown index */ + err = -1; + break; } + break; case ARM_SPE_DATA_SOURCE: case ARM_SPE_TIMESTAMP: - return snprintf(buf, buf_len, "%s %lld", name, payload); + arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload); + break; case ARM_SPE_ADDRESS: switch (idx) { case 0: - case 1: ns = !!(packet->payload & NS_FLAG); + case 1: + ns = !!(packet->payload & NS_FLAG); el = (packet->payload & EL_FLAG) >> 61; payload &= ~(0xffULL << 56); - return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d", + arm_spe_pkt_out_string(&err, &buf, &blen, + "%s 0x%llx el%d ns=%d", (idx == 1) ? "TGT" : "PC", payload, el, ns); - case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload); - case 3: ns = !!(packet->payload & NS_FLAG); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, + "VA 0x%llx", payload); + break; + case 3: + ns = !!(packet->payload & NS_FLAG); payload &= ~(0xffULL << 56); - return snprintf(buf, buf_len, "PA 0x%llx ns=%d", - payload, ns); - default: return 0; + arm_spe_pkt_out_string(&err, &buf, &blen, + "PA 0x%llx ns=%d", payload, ns); + break; + default: + /* Unknown index */ + err = -1; + break; } + break; case ARM_SPE_CONTEXT: - return snprintf(buf, buf_len, "%s 0x%lx el%d", name, - (unsigned long)payload, idx + 1); - case ARM_SPE_COUNTER: { - size_t blen = buf_len; - - ret = snprintf(buf, buf_len, "%s %d ", name, - (unsigned short)payload); - buf += ret; - blen -= ret; + arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d", + name, (unsigned long)payload, idx + 1); + break; + case ARM_SPE_COUNTER: + arm_spe_pkt_out_string(&err, &buf, &blen, "%s %d ", name, + (unsigned short)payload); switch (idx) { - case 0: ret = snprintf(buf, buf_len, "TOT"); break; - case 1: ret = snprintf(buf, buf_len, "ISSUE"); break; - case 2: ret = snprintf(buf, buf_len, "XLAT"); break; - default: ret = 0; + case 0: + arm_spe_pkt_out_string(&err, &buf, &blen, "TOT"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &blen, "ISSUE"); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, "XLAT"); + break; + default: + break; } - if (ret < 0) - return ret; - blen -= ret; - return buf_len - blen; - } + break; default: + /* Unknown packet type */ + err = -1; break; } - return snprintf(buf, buf_len, "%s 0x%llx (%d)", - name, payload, packet->index); + /* Output raw data if detect any error */ + if (err) { + err = 0; + arm_spe_pkt_out_string(&err, &buf_orig, &buf_len, "%s 0x%llx (%d)", + name, payload, packet->index); + } + + return err; } diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 3882a5360ada..8901a1656a41 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -113,7 +113,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, if (ret > 0) { ret = arm_spe_pkt_desc(&packet, desc, ARM_SPE_PKT_DESC_MAX); - if (ret > 0) + if (!ret) color_fprintf(stdout, color, " %s\n", desc); } else { color_fprintf(stdout, color, " Bad packet!\n"); -- cgit v1.3.1 From 11695142e25e957dc3e56c29dc5f9daaf9530b10 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:27 +0800 Subject: perf arm-spe: Refactor packet header parsing The packet header parsing uses the hard coded values and it uses nested if-else statements. To improve the readability, this patch refactors the macros for packet header format so it removes the hard coded values. Furthermore, based on the new mask macros it reduces the nested if-else statements and changes to use the flat conditions checking, this is directive and can easily map to the descriptions in ARMv8-a architecture reference manual (ARM DDI 0487E.a), chapter 'D10.1.5 Statistical Profiling Extension protocol packet headers'. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 92 ++++++++++------------ .../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 20 +++++ 2 files changed, 61 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index fbededc1bcd4..a769fe5a4496 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -16,28 +16,6 @@ #define NS_FLAG BIT_ULL(63) #define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) -#define SPE_HEADER0_PAD 0x0 -#define SPE_HEADER0_END 0x1 -#define SPE_HEADER0_ADDRESS 0x30 /* address packet (short) */ -#define SPE_HEADER0_ADDRESS_MASK 0x38 -#define SPE_HEADER0_COUNTER 0x18 /* counter packet (short) */ -#define SPE_HEADER0_COUNTER_MASK 0x38 -#define SPE_HEADER0_TIMESTAMP 0x71 -#define SPE_HEADER0_TIMESTAMP 0x71 -#define SPE_HEADER0_EVENTS 0x2 -#define SPE_HEADER0_EVENTS_MASK 0xf -#define SPE_HEADER0_SOURCE 0x3 -#define SPE_HEADER0_SOURCE_MASK 0xf -#define SPE_HEADER0_CONTEXT 0x24 -#define SPE_HEADER0_CONTEXT_MASK 0x3c -#define SPE_HEADER0_OP_TYPE 0x8 -#define SPE_HEADER0_OP_TYPE_MASK 0x3c -#define SPE_HEADER1_ALIGNMENT 0x0 -#define SPE_HEADER1_ADDRESS 0xb0 /* address packet (extended) */ -#define SPE_HEADER1_ADDRESS_MASK 0xf8 -#define SPE_HEADER1_COUNTER 0x98 /* counter packet (extended) */ -#define SPE_HEADER1_COUNTER_MASK 0xf8 - #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 @@ -200,46 +178,58 @@ static int arm_spe_get_addr(const unsigned char *buf, size_t len, static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { - unsigned int byte; + unsigned int hdr; + unsigned char ext_hdr = 0; memset(packet, 0, sizeof(struct arm_spe_pkt)); if (!len) return ARM_SPE_NEED_MORE_BYTES; - byte = buf[0]; - if (byte == SPE_HEADER0_PAD) + hdr = buf[0]; + + if (hdr == SPE_HEADER0_PAD) return arm_spe_get_pad(packet); - else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */ + + if (hdr == SPE_HEADER0_END) /* no timestamp at end of record */ return arm_spe_get_end(packet); - else if (byte & 0xc0 /* 0y11xxxxxx */) { - if (byte & 0x80) { - if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS) - return arm_spe_get_addr(buf, len, 0, packet); - if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER) - return arm_spe_get_counter(buf, len, 0, packet); - } else - if (byte == SPE_HEADER0_TIMESTAMP) - return arm_spe_get_timestamp(buf, len, packet); - else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS) - return arm_spe_get_events(buf, len, packet); - else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE) - return arm_spe_get_data_source(buf, len, packet); - else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT) - return arm_spe_get_context(buf, len, packet); - else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE) - return arm_spe_get_op_type(buf, len, packet); - } else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) { - /* 16-bit header */ - byte = buf[1]; - if (byte == SPE_HEADER1_ALIGNMENT) + + if (hdr == SPE_HEADER0_TIMESTAMP) + return arm_spe_get_timestamp(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_EVENTS) + return arm_spe_get_events(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_SOURCE) + return arm_spe_get_data_source(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_CONTEXT) + return arm_spe_get_context(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_OP_TYPE) + return arm_spe_get_op_type(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) { + /* 16-bit extended format header */ + ext_hdr = 1; + + hdr = buf[1]; + if (hdr == SPE_HEADER1_ALIGNMENT) return arm_spe_get_alignment(buf, len, packet); - else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS) - return arm_spe_get_addr(buf, len, 1, packet); - else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER) - return arm_spe_get_counter(buf, len, 1, packet); } + /* + * The short format header's byte 0 or the extended format header's + * byte 1 has been assigned to 'hdr', which uses the same encoding for + * address packet and counter packet, so don't need to distinguish if + * it's short format or extended format and handle in once. + */ + if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_ADDRESS) + return arm_spe_get_addr(buf, len, ext_hdr, packet); + + if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_COUNTER) + return arm_spe_get_counter(buf, len, ext_hdr, packet); + return ARM_SPE_BAD_PACKET; } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 4c870521b8eb..129f43405eb1 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -36,6 +36,26 @@ struct arm_spe_pkt { uint64_t payload; }; +/* Short header (HEADER0) and extended header (HEADER1) */ +#define SPE_HEADER0_PAD 0x0 +#define SPE_HEADER0_END 0x1 +#define SPE_HEADER0_TIMESTAMP 0x71 +/* Mask for event & data source */ +#define SPE_HEADER0_MASK1 (GENMASK_ULL(7, 6) | GENMASK_ULL(3, 0)) +#define SPE_HEADER0_EVENTS 0x42 +#define SPE_HEADER0_SOURCE 0x43 +/* Mask for context & operation */ +#define SPE_HEADER0_MASK2 GENMASK_ULL(7, 2) +#define SPE_HEADER0_CONTEXT 0x64 +#define SPE_HEADER0_OP_TYPE 0x48 +/* Mask for extended format */ +#define SPE_HEADER0_EXTENDED 0x20 +/* Mask for address & counter */ +#define SPE_HEADER0_MASK3 GENMASK_ULL(7, 3) +#define SPE_HEADER0_ADDRESS 0xb0 +#define SPE_HEADER0_COUNTER 0x98 +#define SPE_HEADER1_ALIGNMENT 0x0 + #define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) #define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) #define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) -- cgit v1.3.1 From ab2aa439e4aaa3ce0fdcfa0f847aed4bf13bf353 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:28 +0800 Subject: perf arm-spe: Add new function arm_spe_pkt_desc_addr() This patch moves out the address parsing code from arm_spe_pkt_desc() and uses the new introduced function arm_spe_pkt_desc_addr() to process address packet. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 64 +++++++++++++--------- 1 file changed, 38 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index a769fe5a4496..b16d68b40bbd 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -288,10 +288,46 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, return ret; } +static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + int ns, el, idx = packet->index; + u64 payload = packet->payload; + int err = 0; + + switch (idx) { + case 0: + case 1: + ns = !!(packet->payload & NS_FLAG); + el = (packet->payload & EL_FLAG) >> 61; + payload &= ~(0xffULL << 56); + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "%s 0x%llx el%d ns=%d", + (idx == 1) ? "TGT" : "PC", payload, el, ns); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "VA 0x%llx", payload); + break; + case 3: + ns = !!(packet->payload & NS_FLAG); + payload &= ~(0xffULL << 56); + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "PA 0x%llx ns=%d", payload, ns); + break; + default: + /* Unknown index */ + err = -1; + break; + } + + return err; +} + int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { - int ns, el, idx = packet->index; + int idx = packet->index; unsigned long long payload = packet->payload; const char *name = arm_spe_pkt_name(packet->type); char *buf_orig = buf; @@ -373,31 +409,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload); break; case ARM_SPE_ADDRESS: - switch (idx) { - case 0: - case 1: - ns = !!(packet->payload & NS_FLAG); - el = (packet->payload & EL_FLAG) >> 61; - payload &= ~(0xffULL << 56); - arm_spe_pkt_out_string(&err, &buf, &blen, - "%s 0x%llx el%d ns=%d", - (idx == 1) ? "TGT" : "PC", payload, el, ns); - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, - "VA 0x%llx", payload); - break; - case 3: - ns = !!(packet->payload & NS_FLAG); - payload &= ~(0xffULL << 56); - arm_spe_pkt_out_string(&err, &buf, &blen, - "PA 0x%llx ns=%d", payload, ns); - break; - default: - /* Unknown index */ - err = -1; - break; - } + err = arm_spe_pkt_desc_addr(packet, buf, buf_len); break; case ARM_SPE_CONTEXT: arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d", -- cgit v1.3.1 From 09935ca7b64cfa379b6ebf2b8cdb3126e09bffab Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:29 +0800 Subject: perf arm-spe: Refactor address packet handling This patch is to refactor address packet handling, it defines macros for address packet's header and payload, these macros are used by decoder and the dump flow. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 29 +++++++++--------- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 26 ++++++++-------- .../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 35 ++++++++++++++-------- 3 files changed, 48 insertions(+), 42 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index cc18a1e8c212..776b3e6628bb 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -24,36 +24,35 @@ static u64 arm_spe_calc_ip(int index, u64 payload) { - u8 *addr = (u8 *)&payload; - int ns, el; + u64 ns, el; /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { - ns = addr[7] & SPE_ADDR_PKT_NS; - el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET; + ns = SPE_ADDR_PKT_GET_NS(payload); + el = SPE_ADDR_PKT_GET_EL(payload); + + /* Clean highest byte */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); /* Fill highest byte for EL1 or EL2 (VHE) mode */ if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2)) - addr[7] = 0xff; - /* Clean highest byte for other cases */ - else - addr[7] = 0x0; + payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; /* Data access virtual address */ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) { + /* Clean tags */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); + /* Fill highest byte if bits [48..55] is 0xff */ - if (addr[6] == 0xff) - addr[7] = 0xff; - /* Otherwise, cleanup tags */ - else - addr[7] = 0x0; + if (SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload) == 0xffULL) + payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; /* Data access physical address */ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) { - /* Cleanup byte 7 */ - addr[7] = 0x0; + /* Clean highest byte */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); } else { pr_err("unsupported address packet index: 0x%x\n", index); } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index b16d68b40bbd..d37c4008adbc 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -13,9 +13,6 @@ #include "arm-spe-pkt-decoder.h" -#define NS_FLAG BIT_ULL(63) -#define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) - #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 @@ -167,10 +164,11 @@ static int arm_spe_get_addr(const unsigned char *buf, size_t len, const unsigned char ext_hdr, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_ADDRESS; + if (ext_hdr) - packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]); else - packet->index = buf[0] & 0x7; + packet->index = SPE_HDR_SHORT_INDEX(buf[0]); return arm_spe_get_payload(buf, len, ext_hdr, packet); } @@ -296,22 +294,22 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, int err = 0; switch (idx) { - case 0: - case 1: - ns = !!(packet->payload & NS_FLAG); - el = (packet->payload & EL_FLAG) >> 61; - payload &= ~(0xffULL << 56); + case SPE_ADDR_PKT_HDR_INDEX_INS: + case SPE_ADDR_PKT_HDR_INDEX_BRANCH: + ns = !!SPE_ADDR_PKT_GET_NS(payload); + el = SPE_ADDR_PKT_GET_EL(payload); + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s 0x%llx el%d ns=%d", (idx == 1) ? "TGT" : "PC", payload, el, ns); break; - case 2: + case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "VA 0x%llx", payload); break; - case 3: - ns = !!(packet->payload & NS_FLAG); - payload &= ~(0xffULL << 56); + case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS: + ns = !!SPE_ADDR_PKT_GET_NS(payload); + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, "PA 0x%llx ns=%d", payload, ns); break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 129f43405eb1..f97d6840be3a 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -56,19 +56,28 @@ struct arm_spe_pkt { #define SPE_HEADER0_COUNTER 0x98 #define SPE_HEADER1_ALIGNMENT 0x0 -#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) -#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) -#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) -#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3) - -#define SPE_ADDR_PKT_NS BIT(7) -#define SPE_ADDR_PKT_CH BIT(6) -#define SPE_ADDR_PKT_EL_OFFSET (5) -#define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET) -#define SPE_ADDR_PKT_EL0 (0) -#define SPE_ADDR_PKT_EL1 (1) -#define SPE_ADDR_PKT_EL2 (2) -#define SPE_ADDR_PKT_EL3 (3) +#define SPE_HDR_SHORT_INDEX(h) ((h) & GENMASK_ULL(2, 0)) +#define SPE_HDR_EXTENDED_INDEX(h0, h1) (((h0) & GENMASK_ULL(1, 0)) << 3 | \ + SPE_HDR_SHORT_INDEX(h1)) + +/* Address packet header */ +#define SPE_ADDR_PKT_HDR_INDEX_INS 0x0 +#define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1 +#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2 +#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3 + +/* Address packet payload */ +#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56 +#define SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(v) ((v) & GENMASK_ULL(55, 0)) +#define SPE_ADDR_PKT_ADDR_GET_BYTE_6(v) (((v) & GENMASK_ULL(55, 48)) >> 48) + +#define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63) +#define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61) + +#define SPE_ADDR_PKT_EL0 0 +#define SPE_ADDR_PKT_EL1 1 +#define SPE_ADDR_PKT_EL2 2 +#define SPE_ADDR_PKT_EL3 3 const char *arm_spe_pkt_name(enum arm_spe_pkt_type); -- cgit v1.3.1 From 5513ddaf103c62dd1eabe9403c0a8d9f810492dc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:30 +0800 Subject: perf arm_spe: Fixup top byte for data virtual address To establish a valid address from the address packet payload and finally the address value can be used for parsing data symbol in DSO, current code uses 0xff to replace the tag in the top byte of data virtual address. So far the code only fixups top byte for the memory layouts with 4KB pages, it misses to support memory layouts with 64KB pages. This patch adds the conditions for checking bits [55:52] are 0xf, if detects the pattern it will fill 0xff into the top byte of the address, also adds comment to explain the fixing up. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 776b3e6628bb..cac2ef79c025 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -24,7 +24,7 @@ static u64 arm_spe_calc_ip(int index, u64 payload) { - u64 ns, el; + u64 ns, el, val; /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || @@ -45,8 +45,22 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Clean tags */ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); - /* Fill highest byte if bits [48..55] is 0xff */ - if (SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload) == 0xffULL) + /* + * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet" + * defines the data virtual address payload format, the top byte + * (bits [63:56]) is assigned as top-byte tag; so we only can + * retrieve address value from bits [55:0]. + * + * According to Documentation/arm64/memory.rst, if detects the + * specific pattern in bits [55:52] of payload which falls in + * the kernel space, should fixup the top byte and this allows + * perf tool to parse DSO symbol for data address correctly. + * + * For this reason, if detects the bits [55:52] is 0xf, will + * fill 0xff into the top byte. + */ + val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload); + if ((val & 0xf0ULL) == 0xf0ULL) payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; /* Data access physical address */ -- cgit v1.3.1 From 6550149e801a32b1533ed86509af76319cb75eba Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:31 +0800 Subject: perf arm-spe: Refactor context packet handling Minor refactoring to use macro for index mask. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index d37c4008adbc..978f5551b82c 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -136,7 +136,7 @@ static int arm_spe_get_context(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_CONTEXT; - packet->index = buf[0] & 0x3; + packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]); return arm_spe_get_payload(buf, len, 0, packet); } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index f97d6840be3a..9bc876bffd35 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -79,6 +79,9 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_EL2 2 #define SPE_ADDR_PKT_EL3 3 +/* Context packet header */ +#define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- cgit v1.3.1 From c52cfe9872132407eef6d734014d6fd7790146f5 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:32 +0800 Subject: perf arm-spe: Add new function arm_spe_pkt_desc_counter() This patch moves out the counter packet parsing code from arm_spe_pkt_desc() to the new function arm_spe_pkt_desc_counter(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-8-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 43 ++++++++++++++-------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 978f5551b82c..397ade5ffdeb 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -322,6 +322,33 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, return err; } +static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + const char *name = arm_spe_pkt_name(packet->type); + int err = 0; + + arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s %d ", name, + (unsigned short)payload); + + switch (packet->index) { + case 0: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE"); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT"); + break; + default: + break; + } + + return err; +} + int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -414,21 +441,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, name, (unsigned long)payload, idx + 1); break; case ARM_SPE_COUNTER: - arm_spe_pkt_out_string(&err, &buf, &blen, "%s %d ", name, - (unsigned short)payload); - switch (idx) { - case 0: - arm_spe_pkt_out_string(&err, &buf, &blen, "TOT"); - break; - case 1: - arm_spe_pkt_out_string(&err, &buf, &blen, "ISSUE"); - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, "XLAT"); - break; - default: - break; - } + err = arm_spe_pkt_desc_counter(packet, buf, buf_len); break; default: /* Unknown packet type */ -- cgit v1.3.1 From d158aa408f221756f99edb128ef35bfd4d3361d5 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:33 +0800 Subject: perf arm-spe: Refactor counter packet handling This patch defines macros for counter packet header, and uses macros to replace hard code values in functions arm_spe_get_counter() and arm_spe_pkt_desc(). In the function arm_spe_get_counter(), adds a new line for more readable. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-9-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 11 ++++++----- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 5 +++++ 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 397ade5ffdeb..52f4339b1f0c 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -152,10 +152,11 @@ static int arm_spe_get_counter(const unsigned char *buf, size_t len, const unsigned char ext_hdr, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_COUNTER; + if (ext_hdr) - packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]); else - packet->index = buf[0] & 0x7; + packet->index = SPE_HDR_SHORT_INDEX(buf[0]); return arm_spe_get_payload(buf, len, ext_hdr, packet); } @@ -333,13 +334,13 @@ static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet, (unsigned short)payload); switch (packet->index) { - case 0: + case SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT"); break; - case 1: + case SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE"); break; - case 2: + case SPE_CNT_PKT_HDR_INDEX_TRANS_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT"); break; default: diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 9bc876bffd35..7d8e34e35f05 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -82,6 +82,11 @@ struct arm_spe_pkt { /* Context packet header */ #define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) +/* Counter packet header */ +#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT 0x0 +#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1 +#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2 + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- cgit v1.3.1 From e66f6d75960220001ce94afe93c981826235c003 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:34 +0800 Subject: perf arm-spe: Add new function arm_spe_pkt_desc_event() This patch moves out the event packet parsing from arm_spe_pkt_desc() to the new function arm_spe_pkt_desc_event(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-10-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 63 +++++++++++++--------- 1 file changed, 37 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 52f4339b1f0c..da6b9f76739c 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -287,6 +287,42 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, return ret; } +static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + int err = 0; + + arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED"); + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS"); + if (payload & 0x20) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL"); + if (payload & 0x40) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); + if (payload & 0x80) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); + if (packet->index > 1) { + if (payload & 0x100) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); + if (payload & 0x200) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); + if (payload & 0x400) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); + } + + return err; +} + static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -367,32 +403,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name); break; case ARM_SPE_EVENTS: - arm_spe_pkt_out_string(&err, &buf, &blen, "EV"); - - if (payload & 0x1) - arm_spe_pkt_out_string(&err, &buf, &blen, " EXCEPTION-GEN"); - if (payload & 0x2) - arm_spe_pkt_out_string(&err, &buf, &blen, " RETIRED"); - if (payload & 0x4) - arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-ACCESS"); - if (payload & 0x8) - arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-REFILL"); - if (payload & 0x10) - arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-ACCESS"); - if (payload & 0x20) - arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-REFILL"); - if (payload & 0x40) - arm_spe_pkt_out_string(&err, &buf, &blen, " NOT-TAKEN"); - if (payload & 0x80) - arm_spe_pkt_out_string(&err, &buf, &blen, " MISPRED"); - if (idx > 1) { - if (payload & 0x100) - arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-ACCESS"); - if (payload & 0x200) - arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-REFILL"); - if (payload & 0x400) - arm_spe_pkt_out_string(&err, &buf, &blen, " REMOTE-ACCESS"); - } + err = arm_spe_pkt_desc_event(packet, buf, buf_len); break; case ARM_SPE_OP_TYPE: switch (idx) { -- cgit v1.3.1 From 889d1a675fcfe734f83c459de023a6f0a91a7a0e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:35 +0800 Subject: perf arm-spe: Refactor event type handling Move the enums of event types to arm-spe-pkt-decoder.h, thus function arm_spe_pkt_desc_event() can use them for bitmasks. Suggested-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-11-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 17 ----------------- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 22 +++++++++++----------- .../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index a5111a8d4360..24727b8ca7ff 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -13,23 +13,6 @@ #include "arm-spe-pkt-decoder.h" -enum arm_spe_events { - EV_EXCEPTION_GEN = 0, - EV_RETIRED = 1, - EV_L1D_ACCESS = 2, - EV_L1D_REFILL = 3, - EV_TLB_ACCESS = 4, - EV_TLB_WALK = 5, - EV_NOT_TAKEN = 6, - EV_MISPRED = 7, - EV_LLC_ACCESS = 8, - EV_LLC_MISS = 9, - EV_REMOTE_ACCESS = 10, - EV_ALIGNMENT = 11, - EV_PARTIAL_PREDICATE = 17, - EV_EMPTY_PREDICATE = 18, -}; - enum arm_spe_sample_type { ARM_SPE_L1D_ACCESS = 1 << 0, ARM_SPE_L1D_MISS = 1 << 1, diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index da6b9f76739c..3f30b2937715 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -295,28 +295,28 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV"); - if (payload & 0x1) + if (payload & BIT(EV_EXCEPTION_GEN)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN"); - if (payload & 0x2) + if (payload & BIT(EV_RETIRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED"); - if (payload & 0x4) + if (payload & BIT(EV_L1D_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS"); - if (payload & 0x8) + if (payload & BIT(EV_L1D_REFILL)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL"); - if (payload & 0x10) + if (payload & BIT(EV_TLB_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS"); - if (payload & 0x20) + if (payload & BIT(EV_TLB_WALK)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL"); - if (payload & 0x40) + if (payload & BIT(EV_NOT_TAKEN)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); - if (payload & 0x80) + if (payload & BIT(EV_MISPRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); if (packet->index > 1) { - if (payload & 0x100) + if (payload & BIT(EV_LLC_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); - if (payload & 0x200) + if (payload & BIT(EV_LLC_MISS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); - if (payload & 0x400) + if (payload & BIT(EV_REMOTE_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 7d8e34e35f05..42ed4e61ede2 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -87,6 +87,24 @@ struct arm_spe_pkt { #define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1 #define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2 +/* Event packet payload */ +enum arm_spe_events { + EV_EXCEPTION_GEN = 0, + EV_RETIRED = 1, + EV_L1D_ACCESS = 2, + EV_L1D_REFILL = 3, + EV_TLB_ACCESS = 4, + EV_TLB_WALK = 5, + EV_NOT_TAKEN = 6, + EV_MISPRED = 7, + EV_LLC_ACCESS = 8, + EV_LLC_MISS = 9, + EV_REMOTE_ACCESS = 10, + EV_ALIGNMENT = 11, + EV_PARTIAL_PREDICATE = 17, + EV_EMPTY_PREDICATE = 18, +}; + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- cgit v1.3.1 From 4d0f4ca273aa95bf592b8bad3c619b5766c8ecc7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:36 +0800 Subject: perf arm-spe: Remove size condition checking for events In the Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.6 Events packet", it describes the event bit is valid with specific payload requirement. For example, the Last Level cache access event, the bit is defined as: E[8], byte 1 bit [0], when SZ == 0b01 , when SZ == 0b10 , or when SZ == 0b11 It requires the payload size is at least 2 bytes, when byte 1 (start counting from 0) is valid, E[8] (bit 0 in byte 1) can be used for LLC access event type. For safety, the code checks the condition for payload size firstly, if meet the requirement for payload size, then continue to parse event type. If review function arm_spe_get_payload(), it has used cast, so any bytes beyond the valid size have been set to zeros. For this reason, we don't need to check payload size anymore afterwards when parse events, thus this patch removes payload size conditions. Suggested-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-12-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 9 +++------ tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 14 ++++++-------- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index cac2ef79c025..90d575cee1b9 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -192,16 +192,13 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) if (payload & BIT(EV_TLB_ACCESS)) decoder->record.type |= ARM_SPE_TLB_ACCESS; - if ((idx == 2 || idx == 4 || idx == 8) && - (payload & BIT(EV_LLC_MISS))) + if (payload & BIT(EV_LLC_MISS)) decoder->record.type |= ARM_SPE_LLC_MISS; - if ((idx == 2 || idx == 4 || idx == 8) && - (payload & BIT(EV_LLC_ACCESS))) + if (payload & BIT(EV_LLC_ACCESS)) decoder->record.type |= ARM_SPE_LLC_ACCESS; - if ((idx == 2 || idx == 4 || idx == 8) && - (payload & BIT(EV_REMOTE_ACCESS))) + if (payload & BIT(EV_REMOTE_ACCESS)) decoder->record.type |= ARM_SPE_REMOTE_ACCESS; if (payload & BIT(EV_MISPRED)) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 3f30b2937715..88bcf7e5be76 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -311,14 +311,12 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); if (payload & BIT(EV_MISPRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); - if (packet->index > 1) { - if (payload & BIT(EV_LLC_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); - if (payload & BIT(EV_LLC_MISS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); - if (payload & BIT(EV_REMOTE_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); - } + if (payload & BIT(EV_LLC_ACCESS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); + if (payload & BIT(EV_LLC_MISS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); + if (payload & BIT(EV_REMOTE_ACCESS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); return err; } -- cgit v1.3.1 From 7488ffc4d981e19feddfe36a619051bf6216c7a1 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:37 +0800 Subject: perf arm-spe: Add new function arm_spe_pkt_desc_op_type() The operation type packet is complex and contains subclass; the parsing flow causes deep indentation; for more readable, this patch introduces a new function arm_spe_pkt_desc_op_type() which is used for operation type parsing. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-13-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 79 ++++++++++++---------- 1 file changed, 45 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 88bcf7e5be76..d6c060f119b4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -321,6 +321,50 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, return err; } +static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + int err = 0; + + switch (packet->index) { + case 0: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + payload & 0x1 ? "ST" : "LD"); + + if (payload & 0x2) { + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); + } else if (payload & 0x4) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); + } + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); + + break; + default: + /* Unknown index */ + err = -1; + break; + } + + return err; +} + static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -404,40 +448,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, err = arm_spe_pkt_desc_event(packet, buf, buf_len); break; case ARM_SPE_OP_TYPE: - switch (idx) { - case 0: - arm_spe_pkt_out_string(&err, &buf, &blen, - payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); - break; - case 1: - arm_spe_pkt_out_string(&err, &buf, &blen, - payload & 0x1 ? "ST" : "LD"); - - if (payload & 0x2) { - if (payload & 0x4) - arm_spe_pkt_out_string(&err, &buf, &blen, " AT"); - if (payload & 0x8) - arm_spe_pkt_out_string(&err, &buf, &blen, " EXCL"); - if (payload & 0x10) - arm_spe_pkt_out_string(&err, &buf, &blen, " AR"); - } else if (payload & 0x4) { - arm_spe_pkt_out_string(&err, &buf, &blen, " SIMD-FP"); - } - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, "B"); - - if (payload & 0x1) - arm_spe_pkt_out_string(&err, &buf, &blen, " COND"); - if (payload & 0x2) - arm_spe_pkt_out_string(&err, &buf, &blen, " IND"); - - break; - default: - /* Unknown index */ - err = -1; - break; - } + err = arm_spe_pkt_desc_op_type(packet, buf, buf_len); break; case ARM_SPE_DATA_SOURCE: case ARM_SPE_TIMESTAMP: -- cgit v1.3.1 From e771218f32f97c0940ae46c23e20d27f3d4c05e3 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:38 +0800 Subject: perf arm-spe: Refactor operation packet handling Defines macros for operation packet header and formats (support sub classes for 'other', 'branch', 'load and store', etc). Uses these macros for operation packet decoding and dumping. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-14-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 26 ++++++++++++---------- .../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 23 +++++++++++++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index d6c060f119b4..1d1354a0eef4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -144,7 +144,7 @@ static int arm_spe_get_op_type(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_OP_TYPE; - packet->index = buf[0] & 0x3; + packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]); return arm_spe_get_payload(buf, len, 0, packet); } @@ -328,31 +328,33 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, int err = 0; switch (packet->index) { - case 0: + case SPE_OP_PKT_HDR_CLASS_OTHER: arm_spe_pkt_out_string(&err, &buf, &buf_len, - payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); break; - case 1: + case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: arm_spe_pkt_out_string(&err, &buf, &buf_len, payload & 0x1 ? "ST" : "LD"); - if (payload & 0x2) { - if (payload & 0x4) + if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) { + if (payload & SPE_OP_PKT_AT) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); - if (payload & 0x8) + if (payload & SPE_OP_PKT_EXCL) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); - if (payload & 0x10) + if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } else if (payload & 0x4) { + } else if (SPE_OP_PKT_LDST_SUBCLASS_GET(payload) == + SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); } break; - case 2: + case SPE_OP_PKT_HDR_CLASS_BR_ERET: arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); - if (payload & 0x1) + if (payload & SPE_OP_PKT_COND) arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); - if (payload & 0x2) + + if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 42ed4e61ede2..7032fc141ad4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -105,6 +105,29 @@ enum arm_spe_events { EV_EMPTY_PREDICATE = 18, }; +/* Operation packet header */ +#define SPE_OP_PKT_HDR_CLASS(h) ((h) & GENMASK_ULL(1, 0)) +#define SPE_OP_PKT_HDR_CLASS_OTHER 0x0 +#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 +#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 + +#define SPE_OP_PKT_COND BIT(0) + +#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) +#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 +#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 +#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10 +#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30 + +#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) + +#define SPE_OP_PKT_AR BIT(4) +#define SPE_OP_PKT_EXCL BIT(3) +#define SPE_OP_PKT_AT BIT(2) +#define SPE_OP_PKT_ST BIT(0) + +#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- cgit v1.3.1 From 3d829724b16c5d2de42e6c9601c696c93a10bc61 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 19 Nov 2020 23:24:39 +0800 Subject: perf arm-spe: Add more sub classes for operation packet For the operation type packet payload with load/store class, it misses to support these sub classes: - A load/store targeting the general-purpose registers; - A load/store targeting unspecified registers; - The ARMv8.4 nested virtualisation extension can redirect system register accesses to a memory page controlled by the hypervisor. The SPE profiling feature in newer implementations can tag those memory accesses accordingly. Add the bit pattern describing load/store sub classes, so that the perf tool can decode it properly. Inspired by Andre Przywara, refined the commit log and code for more clear description. Co-developed-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-15-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 1d1354a0eef4..84d661aab54f 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -343,9 +343,23 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } else if (SPE_OP_PKT_LDST_SUBCLASS_GET(payload) == - SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP) { + } + + switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) { + case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP: arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_GP_REG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG"); + break; + default: + break; } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: -- cgit v1.3.1 From 3601e605501df289db149785e1e6a8d16e557d31 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 19 Nov 2020 23:24:40 +0800 Subject: perf arm_spe: Decode memory tagging properties When SPE records a physical address, it can additionally tag the event with information from the Memory Tagging architecture extension. Decode the two additional fields in the SPE event payload. [leoy: Refined patch to use predefined macros] Signed-off-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Dave Martin Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-16-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++++- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 84d661aab54f..57c01ce27915 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -385,6 +385,7 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { int ns, el, idx = packet->index; + int ch, pat; u64 payload = packet->payload; int err = 0; @@ -404,9 +405,12 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, break; case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS: ns = !!SPE_ADDR_PKT_GET_NS(payload); + ch = !!SPE_ADDR_PKT_GET_CH(payload); + pat = SPE_ADDR_PKT_GET_PAT(payload); payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, - "PA 0x%llx ns=%d", payload, ns); + "PA 0x%llx ns=%d ch=%d pat=%x", + payload, ns, ch, pat); break; default: /* Unknown index */ diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 7032fc141ad4..1ad14885c2a1 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -73,6 +73,8 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63) #define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61) +#define SPE_ADDR_PKT_GET_CH(v) (((v) & BIT_ULL(62)) >> 62) +#define SPE_ADDR_PKT_GET_PAT(v) (((v) & GENMASK_ULL(59, 56)) >> 56) #define SPE_ADDR_PKT_EL0 0 #define SPE_ADDR_PKT_EL1 1 -- cgit v1.3.1 From 05e91e7fe26c6fb116fa16f43c1eed78020f9463 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 19 Nov 2020 23:24:41 +0800 Subject: perf arm-spe: Add support for ARMv8.3-SPE This patch is to support Armv8.3 extension for SPE, it adds alignment field in the Events packet and it supports the Scalable Vector Extension (SVE) for Operation packet and Events packet with two additions: - The vector length for SVE operations in the Operation Type packet; - The incomplete predicate and empty predicate fields in the Events packet. Signed-off-by: Wei Li Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20201119152441.6972-17-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 36 ++++++++++++++++++++-- .../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 16 ++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 57c01ce27915..f3ac9d40cebf 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -317,6 +317,12 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); if (payload & BIT(EV_REMOTE_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); + if (payload & BIT(EV_ALIGNMENT)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT"); + if (payload & BIT(EV_PARTIAL_PREDICATE)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED"); + if (payload & BIT(EV_EMPTY_PREDICATE)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED"); return err; } @@ -329,8 +335,23 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, switch (packet->index) { case SPE_OP_PKT_HDR_CLASS_OTHER: - arm_spe_pkt_out_string(&err, &buf, &buf_len, - payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); + if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER"); + + /* SVE effective vector length */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", + SPE_OP_PKG_SVE_EVL(payload)); + + if (payload & SPE_OP_PKT_SVE_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); + if (payload & SPE_OP_PKT_SVE_PRED) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); + } else { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER"); + arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s", + payload & SPE_OP_PKT_COND ? + "COND-SELECT" : "INSN-OTHER"); + } break; case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: arm_spe_pkt_out_string(&err, &buf, &buf_len, @@ -361,6 +382,17 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, default: break; } + + if (SPE_OP_PKT_IS_LDST_SVE(payload)) { + /* SVE effective vector length */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", + SPE_OP_PKG_SVE_EVL(payload)); + + if (payload & SPE_OP_PKT_SVE_PRED) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); + if (payload & SPE_OP_PKT_SVE_SG) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG"); + } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 1ad14885c2a1..9b970e7bf1e2 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -113,6 +113,8 @@ enum arm_spe_events { #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 #define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 +#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) + #define SPE_OP_PKT_COND BIT(0) #define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) @@ -128,6 +130,20 @@ enum arm_spe_events { #define SPE_OP_PKT_AT BIT(2) #define SPE_OP_PKT_ST BIT(0) +#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8) + +#define SPE_OP_PKT_SVE_SG BIT(7) +/* + * SVE effective vector length (EVL) is stored in byte 0 bits [6:4]; + * the length is rounded up to a power of two and use 32 as one step, + * so EVL calculation is: + * + * 32 * (2 ^ bits [6:4]) = 32 << (bits [6:4]) + */ +#define SPE_OP_PKG_SVE_EVL(v) (32 << (((v) & GENMASK_ULL(6, 4)) >> 4)) +#define SPE_OP_PKT_SVE_PRED BIT(2) +#define SPE_OP_PKT_SVE_FP BIT(1) + #define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) const char *arm_spe_pkt_name(enum arm_spe_pkt_type); -- cgit v1.3.1 From 7679325702c90aecd393cd7cde685576c14489c0 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 16 Nov 2020 19:08:48 +1300 Subject: selftests/dma: add test application for DMA_MAP_BENCHMARK This patch provides the test application for DMA_MAP_BENCHMARK. Before running the test application, we need to bind a device to dma_map_ benchmark driver. For example, unbind "xxx" from its original driver and bind to dma_map_benchmark: echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override echo xxx > /sys/bus/platform/drivers/xxx/unbind echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind Another example for PCI devices: echo dma_map_benchmark > /sys/bus/pci/devices/0000:00:01.0/driver_override echo 0000:00:01.0 > /sys/bus/pci/drivers/xxx/unbind echo 0000:00:01.0 > /sys/bus/pci/drivers/dma_map_benchmark/bind The below command will run 16 threads on numa node 0 for 10 seconds on the device bound to dma_map_benchmark platform_driver or pci_driver: ./dma_map_benchmark -t 16 -s 10 -n 0 dma mapping benchmark: threads:16 seconds:10 average map latency(us):1.1 standard deviation:1.9 average unmap latency(us):0.5 standard deviation:0.8 Cc: Will Deacon Cc: Shuah Khan Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- MAINTAINERS | 6 ++ tools/testing/selftests/dma/Makefile | 6 ++ tools/testing/selftests/dma/config | 1 + tools/testing/selftests/dma/dma_map_benchmark.c | 123 ++++++++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 tools/testing/selftests/dma/Makefile create mode 100644 tools/testing/selftests/dma/config create mode 100644 tools/testing/selftests/dma/dma_map_benchmark.c (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index a008b70f3c16..c88abd1d323b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5237,6 +5237,12 @@ F: include/linux/dma-mapping.h F: include/linux/dma-map-ops.h F: kernel/dma/ +DMA MAPPING BENCHMARK +M: Barry Song +L: iommu@lists.linux-foundation.org +F: kernel/dma/map_benchmark.c +F: tools/testing/selftests/dma/ + DMA-BUF HEAPS FRAMEWORK M: Sumit Semwal R: Benjamin Gaignard diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile new file mode 100644 index 000000000000..aa8e8b5b3864 --- /dev/null +++ b/tools/testing/selftests/dma/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -I../../../../usr/include/ + +TEST_GEN_PROGS := dma_map_benchmark + +include ../lib.mk diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config new file mode 100644 index 000000000000..6102ee3c43cd --- /dev/null +++ b/tools/testing/selftests/dma/config @@ -0,0 +1 @@ +CONFIG_DMA_MAP_BENCHMARK=y diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c new file mode 100644 index 000000000000..7065163a8388 --- /dev/null +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Hisilicon Limited. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +static char *directions[] = { + "BIDIRECTIONAL", + "TO_DEVICE", + "FROM_DEVICE", +}; + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u64 expansion[10]; /* For future use */ +}; + +int main(int argc, char **argv) +{ + struct map_benchmark map; + int fd, opt; + /* default single thread, run 20 seconds on NUMA_NO_NODE */ + int threads = 1, seconds = 20, node = -1; + /* default dma mask 32bit, bidirectional DMA */ + int bits = 32, dir = DMA_MAP_BIDIRECTIONAL; + + int cmd = DMA_MAP_BENCHMARK; + char *p; + + while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) { + switch (opt) { + case 't': + threads = atoi(optarg); + break; + case 's': + seconds = atoi(optarg); + break; + case 'n': + node = atoi(optarg); + break; + case 'b': + bits = atoi(optarg); + break; + case 'd': + dir = atoi(optarg); + break; + default: + return -1; + } + } + + if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) { + fprintf(stderr, "invalid number of threads, must be in 1-%d\n", + DMA_MAP_MAX_THREADS); + exit(1); + } + + if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) { + fprintf(stderr, "invalid number of seconds, must be in 1-%d\n", + DMA_MAP_MAX_SECONDS); + exit(1); + } + + /* suppose the mininum DMA zone is 1MB in the world */ + if (bits < 20 || bits > 64) { + fprintf(stderr, "invalid dma mask bit, must be in 20-64\n"); + exit(1); + } + + if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE && + dir != DMA_MAP_FROM_DEVICE) { + fprintf(stderr, "invalid dma direction\n"); + exit(1); + } + + fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR); + if (fd == -1) { + perror("open"); + exit(1); + } + + map.seconds = seconds; + map.threads = threads; + map.node = node; + map.dma_bits = bits; + map.dma_dir = dir; + if (ioctl(fd, cmd, &map)) { + perror("ioctl"); + exit(1); + } + + printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n", + threads, seconds, node, dir[directions]); + printf("average map latency(us):%.1f standard deviation:%.1f\n", + map.avg_map_100ns/10.0, map.map_stddev/10.0); + printf("average unmap latency(us):%.1f standard deviation:%.1f\n", + map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0); + + return 0; +} -- cgit v1.3.1 From b3e453272d436aab8adbe810c6d7043670281487 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:06 +0100 Subject: tools lib: Adopt memchr_inv() from kernel We'll use it to check for undefined/zero data. Signed-off-by: Jiri Olsa Suggested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/string.h | 1 + tools/lib/string.c | 58 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'tools') diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 5e9e781905ed..db5c99318c79 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -46,4 +46,5 @@ extern char * __must_check skip_spaces(const char *); extern char *strim(char *); +extern void *memchr_inv(const void *start, int c, size_t bytes); #endif /* _TOOLS_LINUX_STRING_H_ */ diff --git a/tools/lib/string.c b/tools/lib/string.c index f645343815de..8b6892f959ab 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -168,3 +168,61 @@ char *strreplace(char *s, char old, char new) *s = new; return s; } + +static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes) +{ + while (bytes) { + if (*start != value) + return (void *)start; + start++; + bytes--; + } + return NULL; +} + +/** + * memchr_inv - Find an unmatching character in an area of memory. + * @start: The memory area + * @c: Find a character other than c + * @bytes: The size of the area. + * + * returns the address of the first character other than @c, or %NULL + * if the whole buffer contains just @c. + */ +void *memchr_inv(const void *start, int c, size_t bytes) +{ + u8 value = c; + u64 value64; + unsigned int words, prefix; + + if (bytes <= 16) + return check_bytes8(start, value, bytes); + + value64 = value; + value64 |= value64 << 8; + value64 |= value64 << 16; + value64 |= value64 << 32; + + prefix = (unsigned long)start % 8; + if (prefix) { + u8 *r; + + prefix = 8 - prefix; + r = check_bytes8(start, value, prefix); + if (r) + return r; + start += prefix; + bytes -= prefix; + } + + words = bytes / 8; + + while (words) { + if (*(u64 *)start != value64) + return check_bytes8(start, value, 8); + start += 8; + words--; + } + + return check_bytes8(start, value, bytes % 8); +} -- cgit v1.3.1 From f45edd86b23a7dc576b881b3da53936ac9f8dffb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:08 +0100 Subject: perf tools: Add build_id__is_defined function Adding build_id__is_defined helper to check build id is defined and is != zero build id. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 6 ++++++ tools/perf/util/build-id.h | 1 + 2 files changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 6b410c3d52dc..2aacc8b29f7e 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -37,6 +37,7 @@ #include #include +#include #include static bool no_buildid_cache; @@ -912,3 +913,8 @@ void build_id__init(struct build_id *bid, const u8 *data, size_t size) memcpy(bid->data, data, size); bid->size = size; } + +bool build_id__is_defined(const struct build_id *bid) +{ + return bid && bid->size ? !!memchr_inv(bid->data, 0, bid->size) : false; +} diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index f293f99d5dba..d53415feaf69 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -21,6 +21,7 @@ struct feat_fd; void build_id__init(struct build_id *bid, const u8 *data, size_t size); int build_id__sprintf(const struct build_id *build_id, char *bf); +bool build_id__is_defined(const struct build_id *bid); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, -- cgit v1.3.1 From 7ac22b088afe26a42978ff7576730ca419da76aa Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:09 +0100 Subject: perf tools: Add filename__decompress function Factor filename__decompress from decompress_kmodule function. It can decompress files with compressions supported in perf - xz and gz, the support needs to be compiled in. It will to be used in following changes to get build id out of compressed elf objects. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 31 +++++++++++++++++++------------ tools/perf/util/dso.h | 2 ++ 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 89b5fd2b5de3..d786cf6b0cfa 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -279,18 +279,12 @@ bool dso__needs_decompress(struct dso *dso) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; } -static int decompress_kmodule(struct dso *dso, const char *name, - char *pathname, size_t len) +int filename__decompress(const char *name, char *pathname, + size_t len, int comp, int *err) { char tmpbuf[] = KMOD_DECOMP_NAME; int fd = -1; - if (!dso__needs_decompress(dso)) - return -1; - - if (dso->comp == COMP_ID__NONE) - return -1; - /* * We have proper compression id for DSO and yet the file * behind the 'name' can still be plain uncompressed object. @@ -304,17 +298,17 @@ static int decompress_kmodule(struct dso *dso, const char *name, * To keep this transparent, we detect this and return the file * descriptor to the uncompressed file. */ - if (!compressions[dso->comp].is_compressed(name)) + if (!compressions[comp].is_compressed(name)) return open(name, O_RDONLY); fd = mkstemp(tmpbuf); if (fd < 0) { - dso->load_errno = errno; + *err = errno; return -1; } - if (compressions[dso->comp].decompress(name, fd)) { - dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; + if (compressions[comp].decompress(name, fd)) { + *err = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; close(fd); fd = -1; } @@ -328,6 +322,19 @@ static int decompress_kmodule(struct dso *dso, const char *name, return fd; } +static int decompress_kmodule(struct dso *dso, const char *name, + char *pathname, size_t len) +{ + if (!dso__needs_decompress(dso)) + return -1; + + if (dso->comp == COMP_ID__NONE) + return -1; + + return filename__decompress(name, pathname, len, dso->comp, + &dso->load_errno); +} + int dso__decompress_kmodule_fd(struct dso *dso, const char *name) { return decompress_kmodule(dso, name, NULL, 0); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index d8cb4f5680a4..cd2fe64a3c5d 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -274,6 +274,8 @@ bool dso__needs_decompress(struct dso *dso); int dso__decompress_kmodule_fd(struct dso *dso, const char *name); int dso__decompress_kmodule_path(struct dso *dso, const char *name, char *pathname, size_t len); +int filename__decompress(const char *name, char *pathname, + size_t len, int comp, int *err); #define KMOD_DECOMP_NAME "/tmp/perf-kmod-XXXXXX" #define KMOD_DECOMP_LEN sizeof(KMOD_DECOMP_NAME) -- cgit v1.3.1 From af21c579c860d10da1b0620e3d5d14abdc0b5fff Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:11 +0100 Subject: perf build-id: Add check for existing link in buildid dir When adding new build id link we fail if the link is already there. Adding check for existing link and output debug message that the build id is already linked. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 2aacc8b29f7e..4a391f13f40d 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -755,8 +755,25 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, tmp = dir_name + strlen(buildid_dir) - 5; memcpy(tmp, "../..", 5); - if (symlink(tmp, linkname) == 0) + if (symlink(tmp, linkname) == 0) { err = 0; + } else if (errno == EEXIST) { + char path[PATH_MAX]; + ssize_t len; + + len = readlink(linkname, path, sizeof(path) - 1); + if (len <= 0) { + pr_err("Cant read link: %s\n", linkname); + goto out_free; + } + path[len] = '\0'; + + if (strcmp(tmp, path)) { + pr_debug("build <%s> already linked to %s\n", + sbuild_id, linkname); + } + err = 0; + } /* Update SDT cache : error is just warned */ if (realname && -- cgit v1.3.1 From 031f112f8dc0f211b59b1b33032671f035edc25d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:12 +0100 Subject: perf tools: Use struct extra_kernel_map in machine__process_kernel_mmap_event Using struct extra_kernel_map in machine__process_kernel_mmap_event, to pass mmap details. This way we can used single function for all 3 mmap versions. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 62 +++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 15385ea00190..1ae32a81639c 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1581,32 +1581,25 @@ static bool machine__uses_kcore(struct machine *machine) } static bool perf_event__is_extra_kernel_mmap(struct machine *machine, - union perf_event *event) + struct extra_kernel_map *xm) { return machine__is(machine, "x86_64") && - is_entry_trampoline(event->mmap.filename); + is_entry_trampoline(xm->name); } static int machine__process_extra_kernel_map(struct machine *machine, - union perf_event *event) + struct extra_kernel_map *xm) { struct dso *kernel = machine__kernel_dso(machine); - struct extra_kernel_map xm = { - .start = event->mmap.start, - .end = event->mmap.start + event->mmap.len, - .pgoff = event->mmap.pgoff, - }; if (kernel == NULL) return -1; - strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN); - - return machine__create_extra_kernel_map(machine, kernel, &xm); + return machine__create_extra_kernel_map(machine, kernel, xm); } static int machine__process_kernel_mmap_event(struct machine *machine, - union perf_event *event) + struct extra_kernel_map *xm) { struct map *map; enum dso_space_type dso_space; @@ -1621,20 +1614,18 @@ static int machine__process_kernel_mmap_event(struct machine *machine, else dso_space = DSO_SPACE__KERNEL_GUEST; - is_kernel_mmap = memcmp(event->mmap.filename, - machine->mmap_name, + is_kernel_mmap = memcmp(xm->name, machine->mmap_name, strlen(machine->mmap_name) - 1) == 0; - if (event->mmap.filename[0] == '/' || - (!is_kernel_mmap && event->mmap.filename[0] == '[')) { - map = machine__addnew_module_map(machine, event->mmap.start, - event->mmap.filename); + if (xm->name[0] == '/' || + (!is_kernel_mmap && xm->name[0] == '[')) { + map = machine__addnew_module_map(machine, xm->start, + xm->name); if (map == NULL) goto out_problem; - map->end = map->start + event->mmap.len; + map->end = map->start + xm->end - xm->start; } else if (is_kernel_mmap) { - const char *symbol_name = (event->mmap.filename + - strlen(machine->mmap_name)); + const char *symbol_name = (xm->name + strlen(machine->mmap_name)); /* * Should be there already, from the build-id table in * the header. @@ -1688,18 +1679,17 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); - machine__update_kernel_mmap(machine, event->mmap.start, - event->mmap.start + event->mmap.len); + machine__update_kernel_mmap(machine, xm->start, xm->end); /* * Avoid using a zero address (kptr_restrict) for the ref reloc * symbol. Effectively having zero here means that at record * time /proc/sys/kernel/kptr_restrict was non zero. */ - if (event->mmap.pgoff != 0) { + if (xm->pgoff != 0) { map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, symbol_name, - event->mmap.pgoff); + xm->pgoff); } if (machine__is_default_guest(machine)) { @@ -1708,8 +1698,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, */ dso__load(kernel, machine__kernel_map(machine)); } - } else if (perf_event__is_extra_kernel_mmap(machine, event)) { - return machine__process_extra_kernel_map(machine, event); + } else if (perf_event__is_extra_kernel_mmap(machine, xm)) { + return machine__process_extra_kernel_map(machine, xm); } return 0; out_problem: @@ -1735,7 +1725,14 @@ int machine__process_mmap2_event(struct machine *machine, if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || sample->cpumode == PERF_RECORD_MISC_KERNEL) { - ret = machine__process_kernel_mmap_event(machine, event); + struct extra_kernel_map xm = { + .start = event->mmap2.start, + .end = event->mmap2.start + event->mmap2.len, + .pgoff = event->mmap2.pgoff, + }; + + strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN); + ret = machine__process_kernel_mmap_event(machine, &xm); if (ret < 0) goto out_problem; return 0; @@ -1785,7 +1782,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || sample->cpumode == PERF_RECORD_MISC_KERNEL) { - ret = machine__process_kernel_mmap_event(machine, event); + struct extra_kernel_map xm = { + .start = event->mmap.start, + .end = event->mmap.start + event->mmap.len, + .pgoff = event->mmap.pgoff, + }; + + strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN); + ret = machine__process_kernel_mmap_event(machine, &xm); if (ret < 0) goto out_problem; return 0; -- cgit v1.3.1 From ca8ea73ae109900cec4c3a1f0d3486a01a0e4434 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:13 +0100 Subject: perf symbols: Try to load vmlinux from buildid database Currently we don't check on kernel's vmlinux the same way as we do for normal binaries, but we either look for kallsyms file in build id database or check on known vmlinux locations (plus some other optional paths). This patch adds the check for standard build id binary location, so we are ready once we start to store it there from debuginfod in following changes. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 13 ++++++++++--- tools/perf/util/build-id.h | 2 ++ tools/perf/util/symbol.c | 16 ++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 4a391f13f40d..1fd58703c2d4 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -261,10 +261,9 @@ static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso, "debug" : "elf")); } -char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, - bool is_debug) +char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, + bool is_debug, bool is_kallsyms) { - bool is_kallsyms = dso__is_kallsyms((struct dso *)dso); bool is_vdso = dso__is_vdso((struct dso *)dso); char sbuild_id[SBUILD_ID_SIZE]; char *linkname; @@ -293,6 +292,14 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, return bf; } +char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, + bool is_debug) +{ + bool is_kallsyms = dso__is_kallsyms((struct dso *)dso); + + return __dso__build_id_filename(dso, bf, size, is_debug, is_kallsyms); +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index d53415feaf69..f1a2f67df6e4 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -29,6 +29,8 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, bool is_debug); +char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, + bool is_debug, bool is_kallsyms); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0d14abdf3d72..64a039cbba1b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2189,6 +2189,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) int err; const char *kallsyms_filename = NULL; char *kallsyms_allocated_filename = NULL; + char *filename = NULL; + /* * Step 1: if the user specified a kallsyms or vmlinux filename, use * it and only it, reporting errors to the user if it cannot be used. @@ -2213,6 +2215,20 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false); } + /* + * Before checking on common vmlinux locations, check if it's + * stored as standard build id binary (not kallsyms) under + * .debug cache. + */ + if (!symbol_conf.ignore_vmlinux_buildid) + filename = __dso__build_id_filename(dso, NULL, 0, false, false); + if (filename != NULL) { + err = dso__load_vmlinux(dso, map, filename, true); + if (err > 0) + return err; + free(filename); + } + if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) { err = dso__load_vmlinux_path(dso, map); if (err > 0) -- cgit v1.3.1 From 058f15113042bc2fa03b0b134bfc7fb8cd156878 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:21 +0100 Subject: perf data: Add is_perf_data function Adding is_perf_data function that returns true if the given path is perf data file. It will be used in following patches. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-21-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 19 +++++++++++++++++++ tools/perf/util/data.h | 1 + 2 files changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 05bbcb663c41..f29af4fc3d09 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -492,3 +492,22 @@ char *perf_data__kallsyms_name(struct perf_data *data) return kallsyms_name; } + +bool is_perf_data(const char *path) +{ + bool ret = false; + FILE *file; + u64 magic; + + file = fopen(path, "r"); + if (!file) + return false; + + if (fread(&magic, 1, 8, file) < 8) + goto out; + + ret = is_perf_magic(magic); +out: + fclose(file); + return ret; +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index c563fcbb0288..62a3e66fbee8 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -98,4 +98,5 @@ int perf_data__update_dir(struct perf_data *data); unsigned long perf_data__size(struct perf_data *data); int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz); char *perf_data__kallsyms_name(struct perf_data *data); +bool is_perf_data(const char *path); #endif /* __PERF_DATA_H */ -- cgit v1.3.1 From 0b7b9e83c76ccffb994da8266110592e5e767718 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:19 +0100 Subject: perf build-id: Use machine__for_each_dso in perf_session__cache_build_ids Using machine__for_each_dso in perf_session__cache_build_ids, so we can reuse perf_session__cache_build_ids with different callback in following changes. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-19-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 1fd58703c2d4..948a7f48d668 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -859,12 +859,16 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *dso, struct machine *machine) +static int dso__cache_build_id(struct dso *dso, struct machine *machine, + void *priv __maybe_unused) { bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); const char *name = dso->long_name; + if (!dso->has_build_id) + return 0; + if (dso__is_kcore(dso)) { is_kallsyms = true; name = machine->mmap_name; @@ -873,43 +877,30 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) is_kallsyms, is_vdso); } -static int __dsos__cache_build_ids(struct list_head *head, - struct machine *machine) +static int +machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv) { - struct dso *pos; - int err = 0; - - dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, machine)) - err = -1; + int ret = machine__for_each_dso(&machines->host, fn, priv); + struct rb_node *nd; - return err; -} + for (nd = rb_first_cached(&machines->guests); nd; + nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); -static int machine__cache_build_ids(struct machine *machine) -{ - return __dsos__cache_build_ids(&machine->dsos.head, machine); + ret |= machine__for_each_dso(pos, fn, priv); + } + return ret ? -1 : 0; } int perf_session__cache_build_ids(struct perf_session *session) { - struct rb_node *nd; - int ret; - if (no_buildid_cache) return 0; if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host); - - for (nd = rb_first_cached(&session->machines.guests); nd; - nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos); - } - return ret ? -1 : 0; + return machines__for_each_dso(&session->machines, dso__cache_build_id, NULL) ? -1 : 0; } static bool machine__read_build_ids(struct machine *machine, bool with_hits) -- cgit v1.3.1 From 75fb2af68e358324c2bdcb61be8376cffcb2d034 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:20 +0100 Subject: perf build-id: Add __perf_session__cache_build_ids function Adding __perf_session__cache_build_ids function as an interface for caching sessions build ids with callback function and its data pointer. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-20-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 10 ++++++++-- tools/perf/util/build-id.h | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 948a7f48d668..6bf3cc79c5d5 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -892,7 +892,8 @@ machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv) return ret ? -1 : 0; } -int perf_session__cache_build_ids(struct perf_session *session) +int __perf_session__cache_build_ids(struct perf_session *session, + machine__dso_t fn, void *priv) { if (no_buildid_cache) return 0; @@ -900,7 +901,12 @@ int perf_session__cache_build_ids(struct perf_session *session) if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - return machines__for_each_dso(&session->machines, dso__cache_build_id, NULL) ? -1 : 0; + return machines__for_each_dso(&session->machines, fn, priv) ? -1 : 0; +} + +int perf_session__cache_build_ids(struct perf_session *session) +{ + return __perf_session__cache_build_ids(session, dso__cache_build_id, NULL); } static bool machine__read_build_ids(struct machine *machine, bool with_hits) diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index f1a2f67df6e4..c6f10e3d2152 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -5,6 +5,7 @@ #define BUILD_ID_SIZE 20 #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) +#include "machine.h" #include "tool.h" #include @@ -46,6 +47,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, struct feat_fd *fd); int perf_session__cache_build_ids(struct perf_session *session); +int __perf_session__cache_build_ids(struct perf_session *session, + machine__dso_t fn, void *priv); char *build_id_cache__origname(const char *sbuild_id); char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size); -- cgit v1.3.1 From fd4ebb457c9ca90d10a74aeb85d54e27b08d5e76 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:22 +0100 Subject: perf build-id: Add build_id_cache__add function Adding build_id_cache__add function as core function that adds file into build id database. It will be set from another callers in following changes. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 42 +++++++++++++++++++++++++++--------------- tools/perf/util/build-id.h | 2 ++ 2 files changed, 29 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 6bf3cc79c5d5..02df36b30ac5 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -671,24 +671,15 @@ out: return realname; } -int build_id_cache__add_s(const char *sbuild_id, const char *name, - struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) +int +build_id_cache__add(const char *sbuild_id, const char *name, const char *realname, + struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; - char *realname = NULL, *filename = NULL, *dir_name = NULL, - *linkname = zalloc(size), *tmp; + char *filename = NULL, *dir_name = NULL, *linkname = zalloc(size), *tmp; char *debugfile = NULL; int err = -1; - if (!is_kallsyms) { - if (!is_vdso) - realname = nsinfo__realpath(name, nsi); - else - realname = realpath(name, NULL); - if (!realname) - goto out_free; - } - dir_name = build_id_cache__cachedir(sbuild_id, name, nsi, is_kallsyms, is_vdso); if (!dir_name) @@ -788,8 +779,6 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, pr_debug4("Failed to update/scan SDT cache for %s\n", realname); out_free: - if (!is_kallsyms) - free(realname); free(filename); free(debugfile); free(dir_name); @@ -797,6 +786,29 @@ out_free: return err; } +int build_id_cache__add_s(const char *sbuild_id, const char *name, + struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) +{ + char *realname = NULL; + int err = -1; + + if (!is_kallsyms) { + if (!is_vdso) + realname = nsinfo__realpath(name, nsi); + else + realname = realpath(name, NULL); + if (!realname) + goto out_free; + } + + err = build_id_cache__add(sbuild_id, name, realname, nsi, is_kallsyms, is_vdso); + +out_free: + if (!is_kallsyms) + free(realname); + return err; +} + static int build_id_cache__add_b(const struct build_id *bid, const char *name, struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index c6f10e3d2152..02613f4b2c29 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -63,6 +63,8 @@ char *build_id_cache__complement(const char *incomplete_sbuild_id); int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi, struct strlist **result); bool build_id_cache__cached(const char *sbuild_id); +int build_id_cache__add(const char *sbuild_id, const char *name, const char *realname, + struct nsinfo *nsi, bool is_kallsyms, bool is_vdso); int build_id_cache__add_s(const char *sbuild_id, const char *name, struct nsinfo *nsi, bool is_kallsyms, bool is_vdso); -- cgit v1.3.1 From 3b13eaf0ba1d5ab59368e23ff5e5350f51c1a352 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Nov 2020 08:32:33 -0300 Subject: perf tools: Update copy of libbpf's hashmap.c To pick the changes in: 7a078d2d18801bba ("libbpf, hashmap: Fix undefined behavior in hash_bits") That don't entail any changes in tools/perf. This addresses this perf build warning: Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h' diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h Not a kernel ABI, its just that this uses the mechanism in place for checking kernel ABI files drift. Cc: Adrian Hunter Cc: Daniel Borkmann Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hashmap.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index d9b385fe808c..10a4c4cd13cf 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -15,6 +15,9 @@ static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ + if (bits == 0) + return 0; + #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) /* LP64 case */ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); @@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur; \ cur = cur->next) \ if (map->equal_fn(cur->key, (_key), map->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ if (map->equal_fn(cur->key, (_key), map->ctx)) -- cgit v1.3.1 From 9713070028b9ab317f395ee130fa2c4ea741bab4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 24 Nov 2020 18:36:52 +0800 Subject: perf diff: Fix error return value in __cmd_diff() An appropriate return value should be set on the failed path. Fixes: 2a09a84c720b436a ("perf diff: Support hot streams comparison") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201124103652.438-1-thunder.leizhen@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 584e2e1a3793..cefc71506409 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1222,8 +1222,10 @@ static int __cmd_diff(void) if (compute == COMPUTE_STREAM) { d->evlist_streams = evlist__create_streams( d->session->evlist, 5); - if (!d->evlist_streams) + if (!d->evlist_streams) { + ret = -ENOMEM; goto out_delete; + } } } -- cgit v1.3.1 From aa50d953c169e876413bf237319e728dd41d9fdd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Nov 2020 14:43:56 +0900 Subject: perf record: Synthesize cgroup events only if needed It didn't check the tool->cgroup_events bit which is set when the --all-cgroups option is given. Without it, samples will not have cgroup info so no reason to synthesize. We can check the PERF_RECORD_CGROUP records after running perf record *WITHOUT* the --all-cgroups option: Before: $ perf report -D | grep CGROUP 0 0 0x8430 [0x38]: PERF_RECORD_CGROUP cgroup: 1 / CGROUP events: 1 CGROUP events: 0 CGROUP events: 0 After: $ perf report -D | grep CGROUP CGROUP events: 0 CGROUP events: 0 CGROUP events: 0 Committer testing: Before: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.208 MB perf.data (10003 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 146 CGROUP events: 0 CGROUP events: 0 # After: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.208 MB perf.data (10448 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 0 CGROUP events: 0 CGROUP events: 0 # With all-cgroups: # perf record --all-cgroups -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.374 MB perf.data (11526 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 146 CGROUP events: 0 CGROUP events: 0 # Fixes: 8fb4b67939e16 ("perf record: Add --all-cgroups option") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201127054356.405481-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 8a23391558cf..d9c624377da7 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -563,6 +563,9 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool, char cgrp_root[PATH_MAX]; size_t mount_len; /* length of mount point in the path */ + if (!tool || !tool->cgroup_events) + return 0; + if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) { pr_debug("cannot find cgroup mount point\n"); return -1; -- cgit v1.3.1 From c0ee1d5ae8c8650031badcfca6483a28c0f94f38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Nov 2020 13:14:03 +0900 Subject: perf stat: Use proper cpu for shadow stats Currently perf stat shows some metrics (like IPC) for defined events. But when no aggregation mode is used (-A option), it shows incorrect values since it used a value from a different cpu. Before: $ perf stat -aA -e cycles,instructions sleep 1 Performance counter stats for 'system wide': CPU0 116,057,380 cycles CPU1 86,084,722 cycles CPU2 99,423,125 cycles CPU3 98,272,994 cycles CPU0 53,369,217 instructions # 0.46 insn per cycle CPU1 33,378,058 instructions # 0.29 insn per cycle CPU2 58,150,086 instructions # 0.50 insn per cycle CPU3 40,029,703 instructions # 0.34 insn per cycle 1.001816971 seconds time elapsed So the IPC for CPU1 should be 0.38 (= 33,378,058 / 86,084,722) but it was 0.29 (= 33,378,058 / 116,057,380) and so on. After: $ perf stat -aA -e cycles,instructions sleep 1 Performance counter stats for 'system wide': CPU0 109,621,384 cycles CPU1 159,026,454 cycles CPU2 99,460,366 cycles CPU3 124,144,142 cycles CPU0 44,396,706 instructions # 0.41 insn per cycle CPU1 120,195,425 instructions # 0.76 insn per cycle CPU2 44,763,978 instructions # 0.45 insn per cycle CPU3 69,049,079 instructions # 0.56 insn per cycle 1.001910444 seconds time elapsed Fixes: 44d49a600259 ("perf stat: Support metrics in --per-core/socket mode") Reported-by: Sam Xi Signed-off-by: Namhyung Kim Reviewed-by: Andi Kleen Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201127041404.390276-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 4b57c0c07632..a963b5b8eb72 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -324,13 +324,10 @@ static int first_shadow_cpu(struct perf_stat_config *config, struct evlist *evlist = evsel->evlist; int i; - if (!config->aggr_get_id) - return 0; - if (config->aggr_mode == AGGR_NONE) return id; - if (config->aggr_mode == AGGR_GLOBAL) + if (!config->aggr_get_id) return 0; for (i = 0; i < evsel__nr_cpus(evsel); i++) { -- cgit v1.3.1 From ab4200c17ba6fe71d2da64317aae8a8aa684624c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Nov 2020 14:48:46 +0900 Subject: perf probe: Fix to die_entrypc() returns error correctly Fix die_entrypc() to return error correctly if the DIE has no DW_AT_ranges attribute. Since dwarf_ranges() will treat the case as an empty ranges and return 0, we have to check it by ourselves. Fixes: 91e2f539eeda ("perf probe: Fix to show function entry line as probe-able") Signed-off-by: Masami Hiramatsu Cc: Sumanth Korikkar Cc: Thomas Richter Link: http://lore.kernel.org/lkml/160645612634.2824037.5284932731175079426.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index aa898014ad12..03c1a39c312a 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -373,6 +373,7 @@ bool die_is_func_def(Dwarf_Die *dw_die) int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) { Dwarf_Addr base, end; + Dwarf_Attribute attr; if (!addr) return -EINVAL; @@ -380,6 +381,13 @@ int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) if (dwarf_entrypc(dw_die, addr) == 0) return 0; + /* + * Since the dwarf_ranges() will return 0 if there is no + * DW_AT_ranges attribute, we should check it first. + */ + if (!dwarf_attr(dw_die, DW_AT_ranges, &attr)) + return -ENOENT; + return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0; } -- cgit v1.3.1 From a9ffd0484eb4426e6befd07e7be6c01108716302 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Nov 2020 14:48:55 +0900 Subject: perf probe: Change function definition check due to broken DWARF Since some gcc generates a broken DWARF which lacks DW_AT_declaration attribute from the subprogram DIE of function prototype. (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97060) So, in addition to the DW_AT_declaration check, we also check the subprogram DIE has DW_AT_inline or actual entry pc. Committer testing: # cat /etc/fedora-release Fedora release 33 (Thirty Three) # Before: # perf test vfs_getname 78: Use vfs_getname probe to get syscall args filenames : FAILED! 79: Check open filename arg using perf trace + vfs_getname : FAILED! 81: Add vfs_getname probe to get syscall args filenames : FAILED! # After: # perf test vfs_getname 78: Use vfs_getname probe to get syscall args filenames : Ok 79: Check open filename arg using perf trace + vfs_getname : Ok 81: Add vfs_getname probe to get syscall args filenames : Ok # Reported-by: Thomas Richter Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Sumanth Korikkar Link: http://lore.kernel.org/lkml/160645613571.2824037.7441351537890235895.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 20 ++++++++++++++++++-- tools/perf/util/probe-finder.c | 3 +-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 03c1a39c312a..7b2d471a6419 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -356,9 +356,25 @@ bool die_is_signed_type(Dwarf_Die *tp_die) bool die_is_func_def(Dwarf_Die *dw_die) { Dwarf_Attribute attr; + Dwarf_Addr addr = 0; + + if (dwarf_tag(dw_die) != DW_TAG_subprogram) + return false; + + if (dwarf_attr(dw_die, DW_AT_declaration, &attr)) + return false; - return (dwarf_tag(dw_die) == DW_TAG_subprogram && - dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL); + /* + * DW_AT_declaration can be lost from function declaration + * by gcc's bug #97060. + * So we need to check this subprogram DIE has DW_AT_inline + * or an entry address. + */ + if (!dwarf_attr(dw_die, DW_AT_inline, &attr) && + die_entrypc(dw_die, &addr) < 0) + return false; + + return true; } /** diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 2c4061035f77..76dd349aa48d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1885,8 +1885,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) return DWARF_CB_OK; - if (die_is_func_def(sp_die) && - die_match_name(sp_die, lr->function)) { + if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) { lf->fname = dwarf_decl_file(sp_die); dwarf_decl_line(sp_die, &lr->offset); pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); -- cgit v1.3.1 From c6232bd40b2eda3819d108e6e3f621ec604e15d8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 26 Nov 2020 14:46:41 +0100 Subject: KVM: arm64: selftests: Filter out DEMUX registers DEMUX register presence depends on the host's hardware (the CLIDR_EL1 register to be precise). This means there's no set of them that we can bless and that it's possible to encounter new ones when running on different hardware (which would generate "Consider adding them ..." messages, but we'll never want to add them.) Remove the ones we have in the blessed list and filter them out of the new list, but also provide a new command line switch to list them if one so desires. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201126134641.35231-3-drjones@redhat.com --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 39 +++++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 33218a395d9f..486932164cf2 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -42,12 +42,16 @@ #define for_each_reg(i) \ for ((i) = 0; (i) < reg_list->n; ++(i)) +#define for_each_reg_filtered(i) \ + for_each_reg(i) \ + if (!filter_reg(reg_list->reg[i])) + #define for_each_missing_reg(i) \ for ((i) = 0; (i) < blessed_n; ++(i)) \ if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) #define for_each_new_reg(i) \ - for ((i) = 0; (i) < reg_list->n; ++(i)) \ + for_each_reg_filtered(i) \ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) @@ -57,6 +61,18 @@ static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; static __u64 *blessed_reg, blessed_n; +static bool filter_reg(__u64 reg) +{ + /* + * DEMUX register presence depends on the host's CLIDR_EL1. + * This means there's no set of them that we can bless. + */ + if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return true; + + return false; +} + static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) { int i; @@ -325,7 +341,7 @@ int main(int ac, char **av) struct kvm_vcpu_init init = { .target = -1, }; int new_regs = 0, missing_regs = 0, i; int failed_get = 0, failed_set = 0, failed_reject = 0; - bool print_list = false, fixup_core_regs = false; + bool print_list = false, print_filtered = false, fixup_core_regs = false; struct kvm_vm *vm; __u64 *vec_regs; @@ -336,8 +352,10 @@ int main(int ac, char **av) fixup_core_regs = true; else if (strcmp(av[i], "--list") == 0) print_list = true; + else if (strcmp(av[i], "--list-filtered") == 0) + print_filtered = true; else - fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + TEST_FAIL("Unknown option: %s\n", av[i]); } vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); @@ -350,10 +368,14 @@ int main(int ac, char **av) if (fixup_core_regs) core_reg_fixup(); - if (print_list) { + if (print_list || print_filtered) { putchar('\n'); - for_each_reg(i) - print_reg(reg_list->reg[i]); + for_each_reg(i) { + __u64 id = reg_list->reg[i]; + if ((print_list && !filter_reg(id)) || + (print_filtered && filter_reg(id))) + print_reg(id); + } putchar('\n'); return 0; } @@ -458,6 +480,8 @@ int main(int ac, char **av) /* * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. + * + * The blessed list is up to date with kernel version v5.10-rc5 */ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), @@ -736,9 +760,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, }; static __u64 base_regs_n = ARRAY_SIZE(base_regs); -- cgit v1.3.1 From 105c4e75feb411a60f5089f7a1e68b8523f986cc Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 26 Nov 2020 10:37:35 +0100 Subject: libbpf: Replace size_t with __u32 in xsk interfaces Replace size_t with __u32 in the xsk interfaces that contain this. There is no reason to have size_t since the internal variable that is manipulated is a __u32. The following APIs are affected: __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx) void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb) void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb) The "nb" variable and the return values have been changed from size_t to __u32. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1606383455-8243-1-git-send-email-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 1719a327e5f9..5865e082ba0b 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -113,8 +113,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) return (entries > nb) ? nb : entries; } -static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, - size_t nb, __u32 *idx) +static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx) { if (xsk_prod_nb_free(prod, nb) < nb) return 0; @@ -125,7 +124,7 @@ static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, return nb; } -static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) +static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) { /* Make sure everything has been written to the ring before indicating * this to the kernel by writing the producer pointer. @@ -135,10 +134,9 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) *prod->producer += nb; } -static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, - size_t nb, __u32 *idx) +static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) { - size_t entries = xsk_cons_nb_avail(cons, nb); + __u32 entries = xsk_cons_nb_avail(cons, nb); if (entries > 0) { /* Make sure we do not speculatively read the data before @@ -153,13 +151,12 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, return entries; } -static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, - size_t nb) +static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb) { cons->cached_cons -= nb; } -static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) +static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb) { /* Make sure data has been read before indicating we are done * with the entries by updating the consumer pointer. -- cgit v1.3.1 From 4f336e88a870ecc56832154dff22853a3ca33e24 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 24 Nov 2020 18:24:50 +0300 Subject: selftests/tls: add CHACHA20-POLY1305 to tls selftests Add new cipher as a variant of standard tls selftests Signed-off-by: Vadim Fedorenko Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 40 ++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index b599f1fa99b5..cb0d1890a860 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -103,32 +103,58 @@ FIXTURE(tls) FIXTURE_VARIANT(tls) { - unsigned int tls_version; + u16 tls_version; + u16 cipher_type; }; -FIXTURE_VARIANT_ADD(tls, 12) +FIXTURE_VARIANT_ADD(tls, 12_gcm) { .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, }; -FIXTURE_VARIANT_ADD(tls, 13) +FIXTURE_VARIANT_ADD(tls, 13_gcm) { .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, +}; + +FIXTURE_VARIANT_ADD(tls, 12_chacha) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_CHACHA20_POLY1305, +}; + +FIXTURE_VARIANT_ADD(tls, 13_chacha) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_CHACHA20_POLY1305, }; FIXTURE_SETUP(tls) { - struct tls12_crypto_info_aes_gcm_128 tls12; + union tls_crypto_context tls12; struct sockaddr_in addr; socklen_t len; int sfd, ret; + size_t tls12_sz; self->notls = false; len = sizeof(addr); memset(&tls12, 0, sizeof(tls12)); tls12.info.version = variant->tls_version; - tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128; + tls12.info.cipher_type = variant->cipher_type; + switch (variant->cipher_type) { + case TLS_CIPHER_CHACHA20_POLY1305: + tls12_sz = sizeof(tls12_crypto_info_chacha20_poly1305); + break; + case TLS_CIPHER_AES_GCM_128: + tls12_sz = sizeof(tls12_crypto_info_aes_gcm_128); + break; + default: + tls12_sz = 0; + } addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); @@ -156,7 +182,7 @@ FIXTURE_SETUP(tls) if (!self->notls) { ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); + tls12_sz); ASSERT_EQ(ret, 0); } @@ -169,7 +195,7 @@ FIXTURE_SETUP(tls) ASSERT_EQ(ret, 0); ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); + tls12_sz); ASSERT_EQ(ret, 0); } -- cgit v1.3.1 From e14038a7ead09faa180eb072adc4a2157a0b475f Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 26 Nov 2020 19:47:47 +0100 Subject: selftests: tc-testing: enable CONFIG_NET_SCH_RED as a module a proper kernel configuration for running kselftest can be obtained with: $ yes | make kselftest-merge enable compile support for the 'red' qdisc: otherwise, tdc kselftest fail when trying to run tdc test items contained in red.json. Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/cfa23f2d4f672401e6cebca3a321dd1901a9ff07.1606416464.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index c33a7aac27ff..b71828df5a6d 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -59,6 +59,7 @@ CONFIG_NET_IFE_SKBPRIO=m CONFIG_NET_IFE_SKBTCINDEX=m CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_RED=m # ## Network testing -- cgit v1.3.1 From 94b69c615e4e2b04d1392d1193c72406ff9fd73e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Nov 2020 13:14:04 +0900 Subject: perf test: Add shadow stat test It calculates IPC from the cycles and instruction counts and compares it with the shadow stat for both global aggregation (default) and no aggregation mode. $ perf stat -a -A -e cycles,instructions sleep 1 Performance counter stats for 'system wide': CPU0 39,580,880 cycles CPU1 45,426,945 cycles CPU2 31,151,685 cycles CPU3 55,167,421 cycles CPU0 17,073,564 instructions # 0.43 insn per cycle CPU1 34,955,764 instructions # 0.77 insn per cycle CPU2 15,688,459 instructions # 0.50 insn per cycle CPU3 34,699,217 instructions # 0.63 insn per cycle 1.003275495 seconds time elapsed In this example, the 'insn per cycle' should be matched to the number for each cpu. For CPU2, 0.50 = 15,688,459 / 31,151,685 . Committer testing: # perf test shadow 78: perf stat metrics (shadow stat) test : Ok # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201127041404.390276-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+shadow_stat.sh | 80 ++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 tools/perf/tests/shell/stat+shadow_stat.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh new file mode 100755 index 000000000000..249dfe48cf6a --- /dev/null +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -0,0 +1,80 @@ +#!/bin/sh +# perf stat metrics (shadow stat) test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# skip if system-wide mode is forbidden +perf stat -a true > /dev/null 2>&1 || exit 2 + +test_global_aggr() +{ + local cyc + + perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ + grep -e cycles -e instructions | \ + while read num evt hash ipc rest + do + # skip not counted events + if [[ $num == "&1 | \ + grep ^CPU | \ + while read cpu num evt hash ipc rest + do + # skip not counted events + if [[ $num == " Date: Mon, 30 Nov 2020 09:08:24 -0300 Subject: perf evsel: Convert last 'struct evsel' methods to the right evsel__ prefix As 'perf_evsel__' means its a function in tools/lib/perf/. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/evsel-tp-sched.c | 25 ++++++++++++------------ tools/perf/ui/browsers/hists.c | 28 ++++++++++----------------- tools/perf/util/evsel.c | 40 +++++++++++++++------------------------ tools/perf/util/header.c | 5 ++--- tools/perf/util/stat.c | 4 ++-- 5 files changed, 41 insertions(+), 61 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 0e224a0a55d9..f9e34bd26cf3 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -5,8 +5,7 @@ #include "tests.h" #include "debug.h" -static int perf_evsel__test_field(struct evsel *evsel, const char *name, - int size, bool should_be_signed) +static int evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed) { struct tep_format_field *field = evsel__field(evsel, name); int is_signed; @@ -43,25 +42,25 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes return -1; } - if (perf_evsel__test_field(evsel, "prev_comm", 16, false)) + if (evsel__test_field(evsel, "prev_comm", 16, false)) ret = -1; - if (perf_evsel__test_field(evsel, "prev_pid", 4, true)) + if (evsel__test_field(evsel, "prev_pid", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "prev_prio", 4, true)) + if (evsel__test_field(evsel, "prev_prio", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true)) + if (evsel__test_field(evsel, "prev_state", sizeof(long), true)) ret = -1; - if (perf_evsel__test_field(evsel, "next_comm", 16, false)) + if (evsel__test_field(evsel, "next_comm", 16, false)) ret = -1; - if (perf_evsel__test_field(evsel, "next_pid", 4, true)) + if (evsel__test_field(evsel, "next_pid", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "next_prio", 4, true)) + if (evsel__test_field(evsel, "next_prio", 4, true)) ret = -1; evsel__delete(evsel); @@ -73,16 +72,16 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes return -1; } - if (perf_evsel__test_field(evsel, "comm", 16, false)) + if (evsel__test_field(evsel, "comm", 16, false)) ret = -1; - if (perf_evsel__test_field(evsel, "pid", 4, true)) + if (evsel__test_field(evsel, "pid", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "prio", 4, true)) + if (evsel__test_field(evsel, "prio", 4, true)) ret = -1; - if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) + if (evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; evsel__delete(evsel); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b0e1880cf992..e735d7e37da9 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2946,14 +2946,10 @@ next: } } -static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, - const char *helpline, - bool left_exits, - struct hist_browser_timer *hbt, - float min_pcnt, - struct perf_env *env, - bool warn_lost_event, - struct annotation_options *annotation_opts) +static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline, + bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, + struct perf_env *env, bool warn_lost_event, + struct annotation_options *annotation_opts) { struct hists *hists = evsel__hists(evsel); struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); @@ -3505,12 +3501,10 @@ browse_hists: */ if (hbt) hbt->timer(hbt->arg); - key = perf_evsel__hists_browse(pos, nr_events, help, - true, hbt, - menu->min_pcnt, - menu->env, - warn_lost_event, - menu->annotation_opts); + key = evsel__hists_browse(pos, nr_events, help, true, hbt, + menu->min_pcnt, menu->env, + warn_lost_event, + menu->annotation_opts); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3633,10 +3627,8 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, single_entry: { struct evsel *first = evlist__first(evlist); - return perf_evsel__hists_browse(first, nr_entries, help, - false, hbt, min_pcnt, - env, warn_lost_event, - annotation_opts); + return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, + env, warn_lost_event, annotation_opts); } } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cad6051d8b0..9005cc974f43 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -497,7 +497,7 @@ static const char *__evsel__hw_name(u64 config) return "unknown-hardware"; } -static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) +static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) { int colon = 0, r = 0; struct perf_event_attr *attr = &evsel->core.attr; @@ -536,7 +536,7 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size) { int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config)); - return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); + return r + evsel__add_modifiers(evsel, bf + r, size - r); } const char *evsel__sw_names[PERF_COUNT_SW_MAX] = { @@ -562,7 +562,7 @@ static const char *__evsel__sw_name(u64 config) static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) { int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config)); - return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); + return r + evsel__add_modifiers(evsel, bf + r, size - r); } static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) @@ -587,7 +587,7 @@ static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size) { struct perf_event_attr *attr = &evsel->core.attr; int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); - return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); + return r + evsel__add_modifiers(evsel, bf + r, size - r); } const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { @@ -682,13 +682,13 @@ out_err: static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size) { int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size); - return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); + return ret + evsel__add_modifiers(evsel, bf + ret, size - ret); } static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size) { int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config); - return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); + return ret + evsel__add_modifiers(evsel, bf + ret, size - ret); } static int evsel__tool_name(char *bf, size_t size) @@ -850,9 +850,7 @@ void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, return __evsel__config_callchain(evsel, opts, param); } -static void -perf_evsel__reset_callgraph(struct evsel *evsel, - struct callchain_param *param) +static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param) { struct perf_event_attr *attr = &evsel->core.attr; @@ -988,7 +986,7 @@ static void evsel__apply_config_terms(struct evsel *evsel, /* If global callgraph set, clear it */ if (callchain_param.enabled) - perf_evsel__reset_callgraph(evsel, &callchain_param); + evsel__reset_callgraph(evsel, &callchain_param); /* set perf-event callgraph */ if (param.enabled) { @@ -1434,9 +1432,7 @@ static int evsel__read_one(struct evsel *evsel, int cpu, int thread) return perf_evsel__read(&evsel->core, cpu, thread, count); } -static void -perf_evsel__set_count(struct evsel *counter, int cpu, int thread, - u64 val, u64 ena, u64 run) +static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run) { struct perf_counts_values *count; @@ -1449,9 +1445,7 @@ perf_evsel__set_count(struct evsel *counter, int cpu, int thread, perf_counts__set_loaded(counter->counts, cpu, thread, true); } -static int -perf_evsel__process_group_data(struct evsel *leader, - int cpu, int thread, u64 *data) +static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data) { u64 read_format = leader->core.attr.read_format; struct sample_read_value *v; @@ -1470,8 +1464,7 @@ perf_evsel__process_group_data(struct evsel *leader, v = (struct sample_read_value *) data; - perf_evsel__set_count(leader, cpu, thread, - v[0].value, ena, run); + evsel__set_count(leader, cpu, thread, v[0].value, ena, run); for (i = 1; i < nr; i++) { struct evsel *counter; @@ -1480,8 +1473,7 @@ perf_evsel__process_group_data(struct evsel *leader, if (!counter) return -EINVAL; - perf_evsel__set_count(counter, cpu, thread, - v[i].value, ena, run); + evsel__set_count(counter, cpu, thread, v[i].value, ena, run); } return 0; @@ -1514,7 +1506,7 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread) if (readn(FD(leader, cpu, thread), data, size) <= 0) return -errno; - return perf_evsel__process_group_data(leader, cpu, thread, data); + return evsel__process_group_data(leader, cpu, thread, data); } int evsel__read_counter(struct evsel *evsel, int cpu, int thread) @@ -1567,9 +1559,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) return fd; } -static void perf_evsel__remove_fd(struct evsel *pos, - int nr_cpus, int nr_threads, - int thread_idx) +static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx) { for (int cpu = 0; cpu < nr_cpus; cpu++) for (int thread = thread_idx; thread < nr_threads - 1; thread++) @@ -1588,7 +1578,7 @@ static int update_fds(struct evsel *evsel, evlist__for_each_entry(evsel->evlist, pos) { nr_cpus = pos != evsel ? nr_cpus : cpu_idx; - perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); /* * Since fds for next evsel has not been created, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b9171bd11fe6..b65c8f7ce36a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3735,8 +3735,7 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } -static int perf_evsel__prepare_tracepoint_event(struct evsel *evsel, - struct tep_handle *pevent) +static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent) { struct tep_event *event; char bf[128]; @@ -3774,7 +3773,7 @@ static int perf_evlist__prepare_tracepoint_events(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { if (pos->core.attr.type == PERF_TYPE_TRACEPOINT && - perf_evsel__prepare_tracepoint_event(pos, pevent)) + evsel__prepare_tracepoint_event(pos, pevent)) return -1; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index bd0decd6d753..5e98d591cb38 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -229,7 +229,7 @@ void perf_evlist__reset_prev_raw_counts(struct evlist *evlist) evsel__reset_prev_raw_counts(evsel); } -static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel) +static void evsel__copy_prev_raw_counts(struct evsel *evsel) { int ncpus = evsel__nr_cpus(evsel); int nthreads = perf_thread_map__nr(evsel->core.threads); @@ -250,7 +250,7 @@ void perf_evlist__copy_prev_raw_counts(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - perf_evsel__copy_prev_raw_counts(evsel); + evsel__copy_prev_raw_counts(evsel); } void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist) -- cgit v1.3.1 From a622eafa1a54043c2eaedfccdd1b1ee5ffeb9d06 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:22:07 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' methods: evlist__set_leader() perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- tools/perf/util/evlist.c | 8 ++++---- tools/perf/util/evlist.h | 4 ++-- tools/perf/util/parse-events.c | 2 +- tools/perf/util/python.c | 2 +- tools/perf/util/record.c | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f15b2f8aa14d..2dcfd6d3f879 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -733,7 +733,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } if (group) - perf_evlist__set_leader(evsel_list); + evlist__set_leader(evsel_list); if (affinity__setup(&affinity) < 0) return -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8bdf3d2c907c..655691eebe85 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -212,7 +212,7 @@ out: return err; } -void __perf_evlist__set_leader(struct list_head *list) +void __evlist__set_leader(struct list_head *list) { struct evsel *evsel, *leader; @@ -226,11 +226,11 @@ void __perf_evlist__set_leader(struct list_head *list) } } -void perf_evlist__set_leader(struct evlist *evlist) +void evlist__set_leader(struct evlist *evlist) { if (evlist->core.nr_entries) { evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0; - __perf_evlist__set_leader(&evlist->core.entries); + __evlist__set_leader(&evlist->core.entries); } } @@ -1694,7 +1694,7 @@ void perf_evlist__force_leader(struct evlist *evlist) if (!evlist->nr_groups) { struct evsel *leader = evlist__first(evlist); - perf_evlist__set_leader(evlist); + evlist__set_leader(evlist); leader->forced_leader = true; } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e1a450322bc5..c155f1364077 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -220,8 +220,8 @@ void perf_evlist__set_selected(struct evlist *evlist, int perf_evlist__create_maps(struct evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); -void __perf_evlist__set_leader(struct list_head *list); -void perf_evlist__set_leader(struct evlist *evlist); +void __evlist__set_leader(struct list_head *list); +void evlist__set_leader(struct evlist *evlist); u64 __evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_sample_type(struct evlist *evlist); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3b581d7b3213..8fa87f60133f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1769,7 +1769,7 @@ void parse_events__set_leader(char *name, struct list_head *list, if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) return; - __perf_evlist__set_leader(list); + __evlist__set_leader(list); leader = list_entry(list->next, struct evsel, core.node); leader->group_name = name ? strdup(name) : NULL; } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index ae8edde7c50e..3d3b46514863 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1089,7 +1089,7 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, return NULL; if (group) - perf_evlist__set_leader(evlist); + evlist__set_leader(evlist); if (evlist__open(evlist) < 0) { PyErr_SetFromErrno(PyExc_OSError); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 07e4b96a6625..7330407db4ba 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -102,7 +102,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, * since some might depend on this info. */ if (opts->group) - perf_evlist__set_leader(evlist); + evlist__set_leader(evlist); if (evlist->core.cpus->map[0] < 0) opts->no_inherit = true; -- cgit v1.3.1 From 7b392ef04ef570c15de8fc0d36171f9bc80dd539 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:26:54 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' 'workload' methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 9 ++++----- tools/perf/builtin-record.c | 11 +++++------ tools/perf/builtin-stat.c | 7 +++---- tools/perf/builtin-trace.c | 7 +++---- tools/perf/tests/event-times.c | 4 ++-- tools/perf/tests/perf-record.c | 8 ++++---- tools/perf/tests/task-exit.c | 9 ++++----- tools/perf/util/evlist.c | 9 ++++----- tools/perf/util/evlist.h | 10 ++++------ 9 files changed, 33 insertions(+), 41 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 9366fad591dc..bf4d8e9ab8f5 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -67,7 +67,7 @@ static void sig_handler(int sig __maybe_unused) } /* - * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since + * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since * we asked by setting its exec_error to the function below, * ftrace__workload_exec_failed_signal. * @@ -600,9 +600,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) if (write_tracing_file("trace", "0") < 0) goto out; - if (argc && perf_evlist__prepare_workload(ftrace->evlist, - &ftrace->target, argv, false, - ftrace__workload_exec_failed_signal) < 0) { + if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false, + ftrace__workload_exec_failed_signal) < 0) { goto out; } @@ -644,7 +643,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) } } - perf_evlist__start_workload(ftrace->evlist); + evlist__start_workload(ftrace->evlist); if (ftrace->initial_delay) { usleep(ftrace->initial_delay * 1000); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index adf311d15d3d..4302f32c80c5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1333,7 +1333,7 @@ record__switch_output(struct record *rec, bool at_exit) static volatile int workload_exec_errno; /* - * perf_evlist__prepare_workload will send a SIGUSR1 + * evlist__prepare_workload will send a SIGUSR1 * if the fork fails, since we asked by setting its * want_signal to true. */ @@ -1689,9 +1689,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__init_features(rec); if (forks) { - err = perf_evlist__prepare_workload(rec->evlist, &opts->target, - argv, data->is_pipe, - workload_exec_failed_signal); + err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, + workload_exec_failed_signal); if (err < 0) { pr_err("Couldn't run the workload!\n"); status = err; @@ -1835,7 +1834,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine); free(event); - perf_evlist__start_workload(rec->evlist); + evlist__start_workload(rec->evlist); } if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack)) @@ -2413,7 +2412,7 @@ static bool dry_run; * XXX Will stay a global variable till we fix builtin-script.c to stop messing * with it and switch to use the library functions in perf_evlist that came * from builtin-record.c, i.e. use record_opts, - * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', + * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', * using pipes, etc. */ static struct option __record_options[] = { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2dcfd6d3f879..460e9b206533 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -534,7 +534,7 @@ static void disable_counters(void) static volatile int workload_exec_errno; /* - * perf_evlist__prepare_workload will send a SIGUSR1 + * evlist__prepare_workload will send a SIGUSR1 * if the fork fails, since we asked by setting its * want_signal to true. */ @@ -724,8 +724,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) bool second_pass = false; if (forks) { - if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, - workload_exec_failed_signal) < 0) { + if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) { perror("failed to prepare workload"); return -1; } @@ -876,7 +875,7 @@ try_again_reset: clock_gettime(CLOCK_MONOTONIC, &ref_time); if (forks) { - perf_evlist__start_workload(evsel_list); + evlist__start_workload(evsel_list); enable_counters(); if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index de80534473af..80af0bf37d76 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3680,7 +3680,7 @@ static int trace__set_filter_pids(struct trace *trace) * Better not use !target__has_task() here because we need to cover the * case where no threads were specified in the command line, but a * workload was, and in that case we will fill in the thread_map when - * we fork the workload in perf_evlist__prepare_workload. + * we fork the workload in evlist__prepare_workload. */ if (trace->filter_pids.nr > 0) { err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, @@ -3969,8 +3969,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) signal(SIGINT, sig_handler); if (forks) { - err = perf_evlist__prepare_workload(evlist, &trace->opts.target, - argv, false, NULL); + err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL); if (err < 0) { fprintf(trace->output, "Couldn't run the workload!\n"); goto out_delete_evlist; @@ -4043,7 +4042,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__enable(evlist); if (forks) - perf_evlist__start_workload(evlist); + evlist__start_workload(evlist); if (trace->opts.initial_delay) { usleep(trace->opts.initial_delay * 1000); diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index db68894a6f40..9da2d4f58b8e 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -32,7 +32,7 @@ static int attach__enable_on_exec(struct evlist *evlist) return err; } - err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL); + err = evlist__prepare_workload(evlist, &target, argv, false, NULL); if (err < 0) { pr_debug("Couldn't run the workload!\n"); return err; @@ -47,7 +47,7 @@ static int attach__enable_on_exec(struct evlist *evlist) return err; } - return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL; + return evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL; } static int detach__enable_on_exec(struct evlist *evlist) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 67d3f5aad016..8745bf5f93e6 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -81,7 +81,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus /* * Create maps of threads and cpus to monitor. In this case * we start with all threads and cpus (-1, -1) but then in - * perf_evlist__prepare_workload we'll fill in the only thread + * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ err = perf_evlist__create_maps(evlist, &opts.target); @@ -92,11 +92,11 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus /* * Prepare the workload in argv[] to run, it'll fork it, and then wait - * for perf_evlist__start_workload() to exec it. This is done this way + * for evlist__start_workload() to exec it. This is done this way * so that we have time to open the evlist (calling sys_perf_event_open * on all the fds) and then mmap them. */ - err = perf_evlist__prepare_workload(evlist, &opts.target, argv, false, NULL); + err = evlist__prepare_workload(evlist, &opts.target, argv, false, NULL); if (err < 0) { pr_debug("Couldn't run the workload!\n"); goto out_delete_evlist; @@ -161,7 +161,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus /* * Now! */ - perf_evlist__start_workload(evlist); + evlist__start_workload(evlist); while (1) { int before = total_events; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index adaff9044331..452d51048cc1 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -23,7 +23,7 @@ static void sig_handler(int sig __maybe_unused) } /* - * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since + * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since * we asked by setting its exec_error to this handler. */ static void workload_exec_failed_signal(int signo __maybe_unused, @@ -67,7 +67,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused /* * Create maps of threads and cpus to monitor. In this case * we start with all threads and cpus (-1, -1) but then in - * perf_evlist__prepare_workload we'll fill in the only thread + * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ cpus = perf_cpu_map__dummy_new(); @@ -83,8 +83,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused cpus = NULL; threads = NULL; - err = perf_evlist__prepare_workload(evlist, &target, argv, false, - workload_exec_failed_signal); + err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { pr_debug("Couldn't run the workload!\n"); goto out_delete_evlist; @@ -116,7 +115,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - perf_evlist__start_workload(evlist); + evlist__start_workload(evlist); retry: md = &evlist->mmap[0]; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 655691eebe85..198d4ef03884 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1315,9 +1315,8 @@ out_err: return err; } -int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target, - const char *argv[], bool pipe_output, - void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) +int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[], + bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) { int child_ready_pipe[2], go_pipe[2]; char bf; @@ -1362,7 +1361,7 @@ int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target, /* * The parent will ask for the execvp() to be performed by * writing exactly one byte, in workload.cork_fd, usually via - * perf_evlist__start_workload(). + * evlist__start_workload(). * * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing @@ -1429,7 +1428,7 @@ out_close_ready_pipe: return -1; } -int perf_evlist__start_workload(struct evlist *evlist) +int evlist__start_workload(struct evlist *evlist) { if (evlist->workload.cork_fd > 0) { char bf = 0; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c155f1364077..a0a53f33a272 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -182,12 +182,10 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); -int perf_evlist__prepare_workload(struct evlist *evlist, - struct target *target, - const char *argv[], bool pipe_output, - void (*exec_error)(int signo, siginfo_t *info, - void *ucontext)); -int perf_evlist__start_workload(struct evlist *evlist); +int evlist__prepare_workload(struct evlist *evlist, struct target *target, + const char *argv[], bool pipe_output, + void (*exec_error)(int signo, siginfo_t *info, void *ucontext)); +int evlist__start_workload(struct evlist *evlist); struct option; -- cgit v1.3.1 From 53f5e9084d0195209bfc7e5fa547fd35bbaadbee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:31:04 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' stats methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 6 +++--- tools/perf/builtin-stat.c | 16 ++++++++-------- tools/perf/tests/parse-metric.c | 4 ++-- tools/perf/util/stat.c | 14 +++++++------- tools/perf/util/stat.h | 12 ++++++------ 5 files changed, 26 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 48588ccf902e..983c101965ab 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1847,7 +1847,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, u64 val; if (!evsel->stats) - perf_evlist__alloc_stats(script->session->evlist, false); + evlist__alloc_stats(script->session->evlist, false); if (evsel_script(evsel->leader)->gnum++ == 0) perf_stat__reset_shadow_stats(); val = sample->period * evsel->scale; @@ -3308,7 +3308,7 @@ static int set_maps(struct perf_script *script) perf_evlist__set_maps(&evlist->core, script->cpus, script->threads); - if (perf_evlist__alloc_stats(evlist, true)) + if (evlist__alloc_stats(evlist, true)) return -ENOMEM; script->allocated = true; @@ -3935,7 +3935,7 @@ out_delete: zfree(&script.ptime_range); } - perf_evlist__free_stats(session->evlist); + evlist__free_stats(session->evlist); perf_session__delete(session); if (script_started) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 460e9b206533..b81e64017808 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -270,7 +270,7 @@ static void perf_stat__reset_stats(void) { int i; - perf_evlist__reset_stats(evsel_list); + evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); for (i = 0; i < stat_config.stats_num; i++) @@ -913,10 +913,10 @@ try_again_reset: update_stats(&walltime_nsecs_stats, t1 - t0); if (stat_config.aggr_mode == AGGR_GLOBAL) - perf_evlist__save_aggr_prev_raw_counts(evsel_list); + evlist__save_aggr_prev_raw_counts(evsel_list); - perf_evlist__copy_prev_raw_counts(evsel_list); - perf_evlist__reset_prev_raw_counts(evsel_list); + evlist__copy_prev_raw_counts(evsel_list); + evlist__reset_prev_raw_counts(evsel_list); runtime_stat_reset(&stat_config); perf_stat__reset_shadow_per_stat(&rt_stat); } else @@ -1907,7 +1907,7 @@ static int set_maps(struct perf_stat *st) perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); - if (perf_evlist__alloc_stats(evsel_list, true)) + if (evlist__alloc_stats(evsel_list, true)) return -ENOMEM; st->maps_allocated = true; @@ -2309,7 +2309,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (perf_evlist__alloc_stats(evsel_list, interval)) + if (evlist__alloc_stats(evsel_list, interval)) goto out; if (perf_stat_init_aggr_mode()) @@ -2349,7 +2349,7 @@ int cmd_stat(int argc, const char **argv) run_idx + 1); if (run_idx != 0) - perf_evlist__reset_prev_raw_counts(evsel_list); + evlist__reset_prev_raw_counts(evsel_list); status = run_perf_stat(argc, argv, run_idx); if (forever && status != -1 && !interval) { @@ -2400,7 +2400,7 @@ int cmd_stat(int argc, const char **argv) } perf_stat__exit_aggr_mode(); - perf_evlist__free_stats(evsel_list); + evlist__free_stats(evsel_list); out: zfree(&stat_config.walltime_run); diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 7c1bde01cb50..ce7be37f0d88 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -166,7 +166,7 @@ static int __compute_metric(const char *name, struct value *vals, if (err) goto out; - err = perf_evlist__alloc_stats(evlist, false); + err = evlist__alloc_stats(evlist, false); if (err) goto out; @@ -183,7 +183,7 @@ out: /* ... clenup. */ metricgroup__rblist_exit(&metric_events); runtime_stat__exit(&st); - perf_evlist__free_stats(evlist); + evlist__free_stats(evlist); perf_cpu_map__put(cpus); evlist__delete(evlist); return err; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5e98d591cb38..8be9c3da9e56 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -184,7 +184,7 @@ static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) return 0; } -int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) +int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) { struct evsel *evsel; @@ -196,11 +196,11 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) return 0; out_free: - perf_evlist__free_stats(evlist); + evlist__free_stats(evlist); return -1; } -void perf_evlist__free_stats(struct evlist *evlist) +void evlist__free_stats(struct evlist *evlist) { struct evsel *evsel; @@ -211,7 +211,7 @@ void perf_evlist__free_stats(struct evlist *evlist) } } -void perf_evlist__reset_stats(struct evlist *evlist) +void evlist__reset_stats(struct evlist *evlist) { struct evsel *evsel; @@ -221,7 +221,7 @@ void perf_evlist__reset_stats(struct evlist *evlist) } } -void perf_evlist__reset_prev_raw_counts(struct evlist *evlist) +void evlist__reset_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; @@ -245,7 +245,7 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel) evsel->counts->aggr = evsel->prev_raw_counts->aggr; } -void perf_evlist__copy_prev_raw_counts(struct evlist *evlist) +void evlist__copy_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; @@ -253,7 +253,7 @@ void perf_evlist__copy_prev_raw_counts(struct evlist *evlist) evsel__copy_prev_raw_counts(evsel); } -void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist) +void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 05adf8165025..4cba4b106e45 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -213,12 +213,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct runtime_stat *st); void perf_stat__collect_metric_expr(struct evlist *); -int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); -void perf_evlist__free_stats(struct evlist *evlist); -void perf_evlist__reset_stats(struct evlist *evlist); -void perf_evlist__reset_prev_raw_counts(struct evlist *evlist); -void perf_evlist__copy_prev_raw_counts(struct evlist *evlist); -void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist); +int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); +void evlist__free_stats(struct evlist *evlist); +void evlist__reset_stats(struct evlist *evlist); +void evlist__reset_prev_raw_counts(struct evlist *evlist); +void evlist__copy_prev_raw_counts(struct evlist *evlist); +void evlist__save_aggr_prev_raw_counts(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); -- cgit v1.3.1 From ade9d208d6f054c0cd69af16c0a23af62b3da3b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:33:55 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' 'toggle' methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 8 ++++---- tools/perf/builtin-top.c | 6 +++--- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/util/evlist.c | 7 +++---- tools/perf/util/evlist.h | 4 ++-- 5 files changed, 13 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4302f32c80c5..d0e3dd57c108 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1166,7 +1166,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); if (overwrite) - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); out: return rc; } @@ -1860,11 +1860,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * BKW_MMAP_EMPTY here: when done == true and * hits != rec->samples in previous round. * - * perf_evlist__toggle_bkw_mmap ensure we never + * evlist__toggle_bkw_mmap ensure we never * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. */ if (trigger_is_hit(&switch_output_trigger) || done || draining) - perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); + evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); if (record__mmap_read_all(rec, false) < 0) { trigger_error(&auxtrace_snapshot_trigger); @@ -1903,7 +1903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * record__mmap_read_all(): we should have collected * data from it. */ - perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); + evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); if (!quiet) fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7c64134472c7..b7da09376c72 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -918,14 +918,14 @@ static void perf_top__mmap_read(struct perf_top *top) int i; if (overwrite) - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); for (i = 0; i < top->evlist->core.nr_mmaps; i++) perf_top__mmap_read_idx(top, i); if (overwrite) { - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e735d7e37da9..48814efb98e8 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3264,7 +3264,7 @@ do_hotkey: // key came straight from options ui__popup_menu() if (!is_report_browser(hbt)) { struct perf_top *top = hbt->arg; - perf_evlist__toggle_enable(top->evlist); + evlist__toggle_enable(top->evlist); /* * No need to refresh, resort/decay histogram * entries if we are not collecting samples: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 198d4ef03884..3ea3c649ab04 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -447,7 +447,7 @@ void evlist__enable(struct evlist *evlist) evlist->enabled = true; } -void perf_evlist__toggle_enable(struct evlist *evlist) +void evlist__toggle_enable(struct evlist *evlist) { (evlist->enabled ? evlist__disable : evlist__enable)(evlist); } @@ -727,7 +727,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) if (overwrite) { evlist->overwrite_mmap = maps; if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } else { evlist->mmap = maps; } @@ -1613,8 +1613,7 @@ perf_evlist__find_evsel_by_str(struct evlist *evlist, return NULL; } -void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, - enum bkw_mmap_state state) +void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state) { enum bkw_mmap_state old_state = evlist->bkw_mmap_state; enum action { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a0a53f33a272..e5a4de1a8915 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -168,7 +168,7 @@ struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id); -void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state); +void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state); void evlist__mmap_consume(struct evlist *evlist, int idx); @@ -207,7 +207,7 @@ size_t evlist__mmap_size(unsigned long pages); void evlist__disable(struct evlist *evlist); void evlist__enable(struct evlist *evlist); -void perf_evlist__toggle_enable(struct evlist *evlist); +void evlist__toggle_enable(struct evlist *evlist); int perf_evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); -- cgit v1.3.1 From 24bf91a7540bc0d14c389dd4f612eea57c06dc93 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:38:02 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' 'filter' methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-trace.c | 8 ++++---- tools/perf/util/evlist.c | 22 +++++++++++----------- tools/perf/util/evlist.h | 14 +++++++------- 5 files changed, 24 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d0e3dd57c108..d53396421a02 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -946,7 +946,7 @@ try_again: "even with a suitable vmlinux or kallsyms file.\n\n"); } - if (perf_evlist__apply_filters(evlist, &pos)) { + if (evlist__apply_filters(evlist, &pos)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", pos->filter, evsel__name(pos), errno, str_error_r(errno, msg, sizeof(msg))); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b81e64017808..29f7d4752e84 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -842,7 +842,7 @@ try_again_reset: return -1; } - if (perf_evlist__apply_filters(evsel_list, &counter)) { + if (evlist__apply_filters(evsel_list, &counter)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", counter->filter, evsel__name(counter), errno, str_error_r(errno, msg, sizeof(msg))); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 80af0bf37d76..9f6f0f873cbc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3666,7 +3666,7 @@ static int trace__set_filter_loop_pids(struct trace *trace) thread = parent; } - err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids); + err = evlist__append_tp_filter_pids(trace->evlist, nr, pids); if (!err && trace->filter_pids.map) err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids); @@ -3683,8 +3683,8 @@ static int trace__set_filter_pids(struct trace *trace) * we fork the workload in evlist__prepare_workload. */ if (trace->filter_pids.nr > 0) { - err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, - trace->filter_pids.entries); + err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, + trace->filter_pids.entries); if (!err && trace->filter_pids.map) { err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, trace->filter_pids.entries); @@ -4027,7 +4027,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) err = trace__expand_filters(trace, &evsel); if (err) goto out_delete_evlist; - err = perf_evlist__apply_filters(evlist, &evsel); + err = evlist__apply_filters(evlist, &evsel); if (err < 0) goto out_error_apply_filters; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3ea3c649ab04..725f9f58bdd1 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -975,7 +975,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist, __evsel__reset_sample_bit(evsel, bit); } -int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) +int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) { struct evsel *evsel; int err = 0; @@ -998,7 +998,7 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) return err; } -int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) +int evlist__set_tp_filter(struct evlist *evlist, const char *filter) { struct evsel *evsel; int err = 0; @@ -1018,7 +1018,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) return err; } -int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) +int evlist__append_tp_filter(struct evlist *evlist, const char *filter) { struct evsel *evsel; int err = 0; @@ -1064,32 +1064,32 @@ out_free: return NULL; } -int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); - int ret = perf_evlist__set_tp_filter(evlist, filter); + int ret = evlist__set_tp_filter(evlist, filter); free(filter); return ret; } -int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) +int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) { - return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); + return evlist__set_tp_filter_pids(evlist, 1, &pid); } -int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); - int ret = perf_evlist__append_tp_filter(evlist, filter); + int ret = evlist__append_tp_filter(evlist, filter); free(filter); return ret; } -int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) +int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) { - return perf_evlist__append_tp_filter_pids(evlist, 1, &pid); + return evlist__append_tp_filter_pids(evlist, 1, &pid); } bool evlist__valid_sample_type(struct evlist *evlist) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e5a4de1a8915..3b1acf70b5dd 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -141,14 +141,14 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist, #define perf_evlist__reset_sample_bit(evlist, bit) \ __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit) -int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter); -int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); -int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); +int evlist__set_tp_filter(struct evlist *evlist, const char *filter); +int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); +int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter); +int evlist__append_tp_filter(struct evlist *evlist, const char *filter); -int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); -int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); +int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); +int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); struct evsel * perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id); @@ -216,7 +216,7 @@ void perf_evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int perf_evlist__create_maps(struct evlist *evlist, struct target *target); -int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); +int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); void __evlist__set_leader(struct list_head *list); void evlist__set_leader(struct evlist *evlist); -- cgit v1.3.1 From 08c83997ca87f9e162563a59ea43eabadc9e4231 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:40:10 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' sideband thread methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-top.c | 4 ++-- tools/perf/util/bpf-event.c | 2 +- tools/perf/util/evlist.h | 11 ++++------- tools/perf/util/sideband_evlist.c | 8 ++++---- 5 files changed, 13 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d53396421a02..04946bd3d090 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1548,7 +1548,7 @@ static int record__setup_sb_evlist(struct record *rec) } } #endif - if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { + if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; } @@ -2065,7 +2065,7 @@ out_delete_session: perf_session__delete(session); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(rec->sb_evlist); + evlist__stop_sb_thread(rec->sb_evlist); return status; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b7da09376c72..50dd42da66ee 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1762,7 +1762,7 @@ int cmd_top(int argc, const char **argv) } #endif - if (perf_evlist__start_sb_thread(top.sb_evlist, target)) { + if (evlist__start_sb_thread(top.sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; } @@ -1770,7 +1770,7 @@ int cmd_top(int argc, const char **argv) status = __cmd_top(&top); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(top.sb_evlist); + evlist__stop_sb_thread(top.sb_evlist); out_delete_evlist: evlist__delete(top.evlist); diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 3742511a08d1..57d58c81a5f8 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -526,7 +526,7 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) */ attr.wakeup_watermark = 1; - return perf_evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); + return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); } void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 3b1acf70b5dd..736cb639df84 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -112,14 +112,11 @@ int __evlist__add_default_attrs(struct evlist *evlist, int evlist__add_dummy(struct evlist *evlist); -int perf_evlist__add_sb_event(struct evlist *evlist, - struct perf_event_attr *attr, - evsel__sb_cb_t cb, - void *data); +int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, + evsel__sb_cb_t cb, void *data); void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data); -int perf_evlist__start_sb_thread(struct evlist *evlist, - struct target *target); -void perf_evlist__stop_sb_thread(struct evlist *evlist); +int evlist__start_sb_thread(struct evlist *evlist, struct target *target); +void evlist__stop_sb_thread(struct evlist *evlist); int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index ded9ced02797..90ed016bb348 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -12,8 +12,8 @@ #include #include -int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, - evsel__sb_cb_t cb, void *data) +int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, + evsel__sb_cb_t cb, void *data) { struct evsel *evsel; @@ -94,7 +94,7 @@ void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data) } } -int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) +int evlist__start_sb_thread(struct evlist *evlist, struct target *target) { struct evsel *counter; @@ -138,7 +138,7 @@ out_delete_evlist: return -1; } -void perf_evlist__stop_sb_thread(struct evlist *evlist) +void evlist__stop_sb_thread(struct evlist *evlist) { if (!evlist) return; -- cgit v1.3.1 From 2a6599cd5e093b3c607a39288f14a618c03a0e24 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:43:07 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' sample parsing methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 4 ++-- tools/perf/builtin-trace.c | 5 ++--- tools/perf/tests/code-reading.c | 4 ++-- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/parse-no-sample-id-all.c | 4 ++-- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 8 ++++---- tools/perf/util/auxtrace.c | 2 +- tools/perf/util/evlist.c | 7 ++----- tools/perf/util/evlist.h | 8 ++------ tools/perf/util/s390-cpumsf.c | 4 ++-- tools/perf/util/session.c | 7 +++---- 14 files changed, 26 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 460945ded6dd..e1b60aad13cb 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -764,7 +764,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return (err == -EAGAIN) ? 0 : -1; while ((event = perf_mmap__read_event(&md->core)) != NULL) { - err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + err = evlist__parse_sample_timestamp(evlist, event, ×tamp); if (err) { perf_mmap__consume(&md->core); pr_err("Failed to parse sample\n"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 50dd42da66ee..fff1d5ef5c49 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -890,7 +890,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) while ((event = perf_mmap__read_event(&md->core)) != NULL) { int ret; - ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp); + ret = evlist__parse_sample_timestamp(evlist, event, &last_timestamp); if (ret && ret != -1) break; @@ -1153,7 +1153,7 @@ static int deliver_event(struct ordered_events *qe, return 0; } - ret = perf_evlist__parse_sample(evlist, event, &sample); + ret = evlist__parse_sample(evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); goto next_event; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 9f6f0f873cbc..5638eae36ec8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3700,9 +3700,8 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event) { struct evlist *evlist = trace->evlist; struct perf_sample sample; - int err; + int err = evlist__parse_sample(evlist, event, &sample); - err = perf_evlist__parse_sample(evlist, event, &sample); if (err) fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); else @@ -3735,7 +3734,7 @@ static int trace__deliver_event(struct trace *trace, union perf_event *event) if (!trace->sort_events) return __trace__deliver_event(trace, event); - err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last); + err = evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last); if (err && err != -1) return err; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 035c9123549a..339d432b1bf1 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -378,8 +378,8 @@ static int process_sample_event(struct machine *machine, struct thread *thread; int ret; - if (perf_evlist__parse_sample(evlist, event, &sample)) { - pr_debug("perf_evlist__parse_sample failed\n"); + if (evlist__parse_sample(evlist, event, &sample)) { + pr_debug("evlist__parse_sample failed\n"); return -1; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 7b0dbfc0e17d..d826153adbf8 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -126,7 +126,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse goto out_delete_evlist; } - err = perf_evlist__parse_sample(evlist, event, &sample); + err = evlist__parse_sample(evlist, event, &sample); if (err) { pr_err("Can't parse sample, err = %d\n", err); goto out_delete_evlist; diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index adf3c9c4a416..471273676701 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -27,8 +27,8 @@ static int process_event(struct evlist **pevlist, union perf_event *event) if (!*pevlist) return -1; - if (perf_evlist__parse_sample(*pevlist, event, &sample)) { - pr_debug("perf_evlist__parse_sample failed\n"); + if (evlist__parse_sample(*pevlist, event, &sample)) { + pr_debug("evlist__parse_sample failed\n"); return -1; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 8745bf5f93e6..f0b2066c7ea9 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -182,7 +182,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus if (type < PERF_RECORD_MAX) nr_events[type]++; - err = perf_evlist__parse_sample(evlist, event, &sample); + err = evlist__parse_sample(evlist, event, &sample); if (err < 0) { if (verbose > 0) perf_event__fprintf(event, NULL, stderr); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 4b9b731977c8..a49c9e23053b 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -109,7 +109,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) if (event->header.type != PERF_RECORD_SAMPLE) goto next_event; - err = perf_evlist__parse_sample(evlist, event, &sample); + err = evlist__parse_sample(evlist, event, &sample); if (err < 0) { pr_debug("Error during parse sample\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index db5e1f70053a..b8cdbe6f324f 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -128,8 +128,8 @@ static int process_sample_event(struct evlist *evlist, pid_t next_tid, prev_tid; int cpu, err; - if (perf_evlist__parse_sample(evlist, event, &sample)) { - pr_debug("perf_evlist__parse_sample failed\n"); + if (evlist__parse_sample(evlist, event, &sample)) { + pr_debug("evlist__parse_sample failed\n"); return -1; } @@ -223,8 +223,8 @@ static int add_event(struct evlist *evlist, struct list_head *events, node->event = event; list_add(&node->list, events); - if (perf_evlist__parse_sample(evlist, event, &sample)) { - pr_debug("perf_evlist__parse_sample failed\n"); + if (evlist__parse_sample(evlist, event, &sample)) { + pr_debug("evlist__parse_sample failed\n"); return -1; } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 62e7f6c5f8b5..647afdcb2699 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1082,7 +1082,7 @@ static int auxtrace_queue_data_cb(struct perf_session *session, if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE) return 0; - err = perf_evlist__parse_sample(session->evlist, event, &sample); + err = evlist__parse_sample(session->evlist, event, &sample); if (err) return err; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 725f9f58bdd1..943262aa6505 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1447,8 +1447,7 @@ int evlist__start_workload(struct evlist *evlist) return 0; } -int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, - struct perf_sample *sample) +int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); @@ -1457,9 +1456,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, return evsel__parse_sample(evsel, event, sample); } -int perf_evlist__parse_sample_timestamp(struct evlist *evlist, - union perf_event *event, - u64 *timestamp) +int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 736cb639df84..c59a08cf9f25 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -224,12 +224,8 @@ u64 evlist__combined_branch_type(struct evlist *evlist); bool evlist__sample_id_all(struct evlist *evlist); u16 perf_evlist__id_hdr_size(struct evlist *evlist); -int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, - struct perf_sample *sample); - -int perf_evlist__parse_sample_timestamp(struct evlist *evlist, - union perf_event *event, - u64 *timestamp); +int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); +int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp); bool evlist__valid_sample_type(struct evlist *evlist); bool evlist__valid_sample_id_all(struct evlist *evlist); diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index f8861998e5bd..959e9890bccc 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -96,9 +96,9 @@ * | than PERF_RECORD_USER_TYPE_START) are handled by * | perf_session__process_user_event(see below) * | - Those generated by the kernel are handled by - * | perf_evlist__parse_sample_timestamp() + * | evlist__parse_sample_timestamp() * | - * perf_evlist__parse_sample_timestamp() + * evlist__parse_sample_timestamp() * | Extract time stamp from sample data. * | * perf_session__queue_event() diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5cc722b6fe7c..61e4b3cc7313 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1523,9 +1523,8 @@ static int perf_session__deliver_event(struct perf_session *session, u64 file_offset) { struct perf_sample sample; - int ret; + int ret = evlist__parse_sample(session->evlist, event, &sample); - ret = perf_evlist__parse_sample(session->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); return ret; @@ -1697,7 +1696,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, out_parse_sample: if (sample && event->header.type < PERF_RECORD_USER_TYPE_START && - perf_evlist__parse_sample(session->evlist, event, sample)) + evlist__parse_sample(session->evlist, event, sample)) return -1; *event_ptr = event; @@ -1754,7 +1753,7 @@ static s64 perf_session__process_event(struct perf_session *session, if (tool->ordered_events) { u64 timestamp = -1ULL; - ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + ret = evlist__parse_sample_timestamp(evlist, event, ×tamp); if (ret && ret != -1) return ret; -- cgit v1.3.1 From b02736f776d5f50bb13ff85eb34efaed0c3f5ffa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:48:07 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' 'find' methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 7 ++----- tools/perf/builtin-sched.c | 6 ++---- tools/perf/builtin-trace.c | 14 +++++--------- tools/perf/util/bpf-loader.c | 3 +-- tools/perf/util/evlist.c | 14 ++++---------- tools/perf/util/evlist.h | 11 +++-------- tools/perf/util/evswitch.c | 4 ++-- tools/perf/util/header.c | 5 ++--- 8 files changed, 21 insertions(+), 43 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a50dae2c4ae9..0062445e8ead 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1960,18 +1960,15 @@ int cmd_kmem(int argc, const char **argv) ret = -1; if (kmem_slab) { - if (!perf_evlist__find_tracepoint_by_name(session->evlist, - "kmem:kmalloc")) { + if (!evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) { pr_err(errmsg, "slab", "slab"); goto out_delete; } } if (kmem_page) { - struct evsel *evsel; + struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc"); - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "kmem:mm_page_alloc"); if (evsel == NULL) { pr_err(errmsg, "page", "page"); goto out_delete; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 0e16f9d5a947..69c769b04a61 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3036,8 +3036,7 @@ static int perf_sched__timehist(struct perf_sched *sched) setup_pager(); /* prefer sched_waking if it is captured */ - if (perf_evlist__find_tracepoint_by_name(session->evlist, - "sched:sched_waking")) + if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) handlers[1].handler = timehist_sched_wakeup_ignore; /* setup per-evsel handlers */ @@ -3045,8 +3044,7 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; /* sched_switch event at a minimum needs to exist */ - if (!perf_evlist__find_tracepoint_by_name(session->evlist, - "sched:sched_switch")) { + if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) { pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); goto out; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5638eae36ec8..6e47b19a1dcb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4227,12 +4227,10 @@ static int trace__replay(struct trace *trace) if (err) goto out; - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "raw_syscalls:sys_enter"); + evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter"); /* older kernels have syscalls tp versus raw_syscalls */ if (evsel == NULL) - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "syscalls:sys_enter"); + evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter"); if (evsel && (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 || @@ -4241,11 +4239,9 @@ static int trace__replay(struct trace *trace) goto out; } - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "raw_syscalls:sys_exit"); + evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit"); if (evsel == NULL) - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, - "syscalls:sys_exit"); + evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit"); if (evsel && (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 || perf_evsel__init_sc_tp_uint_field(evsel, ret))) { @@ -4905,7 +4901,7 @@ int cmd_trace(int argc, const char **argv) if (evsel) { trace.syscalls.events.augmented = evsel; - evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter"); + evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter"); if (evsel == NULL) { pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n"); goto out; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 0374adcb223c..9087f1bffd3d 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1058,12 +1058,11 @@ __bpf_map__config_event(struct bpf_map *map, struct parse_events_term *term, struct evlist *evlist) { - struct evsel *evsel; const struct bpf_map_def *def; struct bpf_map_op *op; const char *map_name = bpf_map__name(map); + struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str); - evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str); if (!evsel) { pr_debug("Event (for '%s') '%s' doesn't exist\n", map_name, term->val.str); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 943262aa6505..e6b2715ad31d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -191,13 +191,12 @@ void perf_evlist__splice_list_tail(struct evlist *evlist, int __evlist__set_tracepoints_handlers(struct evlist *evlist, const struct evsel_str_handler *assocs, size_t nr_assocs) { - struct evsel *evsel; size_t i; int err; for (i = 0; i < nr_assocs; i++) { // Adding a handler for an event not in this evlist, just ignore it. - evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name); + struct evsel *evsel = evlist__find_tracepoint_by_name(evlist, assocs[i].name); if (evsel == NULL) continue; @@ -294,8 +293,7 @@ int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *a return evlist__add_attrs(evlist, attrs, nr_attrs); } -struct evsel * -perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id) +struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) { struct evsel *evsel; @@ -308,9 +306,7 @@ perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id) return NULL; } -struct evsel * -perf_evlist__find_tracepoint_by_name(struct evlist *evlist, - const char *name) +struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name) { struct evsel *evsel; @@ -1594,9 +1590,7 @@ void perf_evlist__set_tracking_event(struct evlist *evlist, tracking_evsel->tracking = true; } -struct evsel * -perf_evlist__find_evsel_by_str(struct evlist *evlist, - const char *str) +struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c59a08cf9f25..aa9b3f9431e7 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -147,12 +147,8 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter); int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -struct evsel * -perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id); - -struct evsel * -perf_evlist__find_tracepoint_by_name(struct evlist *evlist, - const char *name); +struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id); +struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); int evlist__add_pollfd(struct evlist *evlist, int fd); int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); @@ -338,8 +334,7 @@ void evlist__cpu_iter_start(struct evlist *evlist); bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); -struct evsel * -perf_evlist__find_evsel_by_str(struct evlist *evlist, const char *str); +struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); struct evsel *perf_evlist__event2evsel(struct evlist *evlist, union perf_event *event); diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c index 3ba72f743d3c..40cb56a9347d 100644 --- a/tools/perf/util/evswitch.c +++ b/tools/perf/util/evswitch.c @@ -41,7 +41,7 @@ static int evswitch__fprintf_enoent(FILE *fp, const char *evtype, const char *ev int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp) { if (evswitch->on_name) { - evswitch->on = perf_evlist__find_evsel_by_str(evlist, evswitch->on_name); + evswitch->on = evlist__find_evsel_by_str(evlist, evswitch->on_name); if (evswitch->on == NULL) { evswitch__fprintf_enoent(fp, "on", evswitch->on_name); return -ENOENT; @@ -50,7 +50,7 @@ int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp) } if (evswitch->off_name) { - evswitch->off = perf_evlist__find_evsel_by_str(evlist, evswitch->off_name); + evswitch->off = evlist__find_evsel_by_str(evlist, evswitch->off_name); if (evswitch->off == NULL) { evswitch__fprintf_enoent(fp, "off", evswitch->off_name); return -ENOENT; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b65c8f7ce36a..64a3b83b3090 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2263,8 +2263,7 @@ static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused) return 0; } -static struct evsel * -perf_evlist__find_by_index(struct evlist *evlist, int idx) +static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx) { struct evsel *evsel; @@ -2285,7 +2284,7 @@ perf_evlist__set_event_name(struct evlist *evlist, if (!event->name) return; - evsel = perf_evlist__find_by_index(evlist, event->idx); + evsel = evlist__find_by_index(evlist, event->idx); if (!evsel) return; -- cgit v1.3.1 From fd643db5a8797dde0fe8d6f2fd01f36971d43fe0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 09:54:08 -0300 Subject: perf evlist: Ditch unused set/reset sample_bit methods Not used anymore, ditch them. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 18 ------------------ tools/perf/util/evlist.h | 11 ----------- 2 files changed, 29 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e6b2715ad31d..86172ee394d0 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -953,24 +953,6 @@ out_delete_threads: return -1; } -void __perf_evlist__set_sample_bit(struct evlist *evlist, - enum perf_event_sample_format bit) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) - __evsel__set_sample_bit(evsel, bit); -} - -void __perf_evlist__reset_sample_bit(struct evlist *evlist, - enum perf_event_sample_format bit) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) - __evsel__reset_sample_bit(evsel, bit); -} - int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index aa9b3f9431e7..a1288b35717c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -127,17 +127,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist, #define evlist__set_tracepoints_handlers(evlist, array) \ __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) -void __perf_evlist__set_sample_bit(struct evlist *evlist, - enum perf_event_sample_format bit); -void __perf_evlist__reset_sample_bit(struct evlist *evlist, - enum perf_event_sample_format bit); - -#define perf_evlist__set_sample_bit(evlist, bit) \ - __perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit) - -#define perf_evlist__reset_sample_bit(evlist, bit) \ - __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit) - int evlist__set_tp_filter(struct evlist *evlist, const char *filter); int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -- cgit v1.3.1 From 3ccf8a7b66b6bff69a7be62f2d5a2a61328ebe91 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:17:57 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' sample id lookup methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/auxtrace.c | 4 ++-- tools/perf/util/evlist.c | 23 ++++++++++------------- tools/perf/util/evlist.h | 10 ++++------ tools/perf/util/evsel.c | 2 +- tools/perf/util/header.c | 2 +- tools/perf/util/intel-pt.c | 3 +-- tools/perf/util/python.c | 2 +- tools/perf/util/s390-cpumsf.c | 2 +- tools/perf/util/s390-sample-raw.c | 2 +- tools/perf/util/session.c | 6 +++--- tools/perf/util/sideband_evlist.c | 2 +- tools/perf/util/stat.c | 2 +- tools/perf/util/synthetic-events.c | 2 +- 18 files changed, 33 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 983c101965ab..1c322c129185 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2224,7 +2224,7 @@ static int print_event_with_time(struct perf_tool *tool, { struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + struct evsel *evsel = evlist__id2evsel(session->evlist, sample->id); struct thread *thread = NULL; if (evsel && !evsel->core.attr.sample_id_all) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fff1d5ef5c49..88bd1ac5c8cf 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1159,7 +1159,7 @@ static int deliver_event(struct ordered_events *qe, goto next_event; } - evsel = perf_evlist__id2evsel(session->evlist, sample.id); + evsel = evlist__id2evsel(session->evlist, sample.id); assert(evsel != NULL); if (event->header.type == PERF_RECORD_SAMPLE) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6e47b19a1dcb..a463ce8fb8e0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3105,7 +3105,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st return; } - evsel = perf_evlist__id2evsel(trace->evlist, sample->id); + evsel = evlist__id2evsel(trace->evlist, sample->id); if (evsel == NULL) { fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); return; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index d826153adbf8..c01d7e12d241 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -133,7 +133,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse } err = -1; - evsel = perf_evlist__id2evsel(evlist, sample.id); + evsel = evlist__id2evsel(evlist, sample.id); if (evsel == NULL) { pr_debug("event with id %" PRIu64 " doesn't map to an evsel\n", sample.id); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index b8cdbe6f324f..3c0bc3f1fd0e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -133,7 +133,7 @@ static int process_sample_event(struct evlist *evlist, return -1; } - evsel = perf_evlist__id2evsel(evlist, sample.id); + evsel = evlist__id2evsel(evlist, sample.id); if (evsel == switch_tracking->switch_evsel) { next_tid = evsel__intval(evsel, &sample, "next_pid"); prev_tid = evsel__intval(evsel, &sample, "prev_pid"); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 647afdcb2699..b538b9093567 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1017,7 +1017,7 @@ struct auxtrace_queue *auxtrace_queues__sample_queue(struct auxtrace_queues *que if (!id) return NULL; - sid = perf_evlist__id2sid(session->evlist, id); + sid = evlist__id2sid(session->evlist, id); if (!sid) return NULL; @@ -1047,7 +1047,7 @@ int auxtrace_queues__add_sample(struct auxtrace_queues *queues, if (!id) return -EINVAL; - sid = perf_evlist__id2sid(session->evlist, id); + sid = evlist__id2sid(session->evlist, id); if (!sid) return -ENOENT; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 86172ee394d0..51775ff2979f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -509,7 +509,7 @@ int evlist__poll(struct evlist *evlist, int timeout) return perf_evlist__poll(&evlist->core, timeout); } -struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) +struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id) { struct hlist_head *head; struct perf_sample_id *sid; @@ -525,14 +525,14 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) return NULL; } -struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) +struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id) { struct perf_sample_id *sid; if (evlist->core.nr_entries == 1 || !id) return evlist__first(evlist); - sid = perf_evlist__id2sid(evlist, id); + sid = evlist__id2sid(evlist, id); if (sid) return container_of(sid->evsel, struct evsel, core); @@ -542,23 +542,21 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) return NULL; } -struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, - u64 id) +struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id) { struct perf_sample_id *sid; if (!id) return NULL; - sid = perf_evlist__id2sid(evlist, id); + sid = evlist__id2sid(evlist, id); if (sid) return container_of(sid->evsel, struct evsel, core); return NULL; } -static int perf_evlist__event2id(struct evlist *evlist, - union perf_event *event, u64 *id) +static int evlist__event2id(struct evlist *evlist, union perf_event *event, u64 *id) { const __u64 *array = event->sample.array; ssize_t n; @@ -578,8 +576,7 @@ static int perf_evlist__event2id(struct evlist *evlist, return 0; } -struct evsel *perf_evlist__event2evsel(struct evlist *evlist, - union perf_event *event) +struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event) { struct evsel *first = evlist__first(evlist); struct hlist_head *head; @@ -594,7 +591,7 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist, event->header.type != PERF_RECORD_SAMPLE) return first; - if (perf_evlist__event2id(evlist, event, &id)) + if (evlist__event2id(evlist, event, &id)) return NULL; /* Synthesized events have an id of zero */ @@ -1427,7 +1424,7 @@ int evlist__start_workload(struct evlist *evlist) int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + struct evsel *evsel = evlist__event2evsel(evlist, event); if (!evsel) return -EFAULT; @@ -1436,7 +1433,7 @@ int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + struct evsel *evsel = evlist__event2evsel(evlist, event); if (!evsel) return -EFAULT; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a1288b35717c..90f1127fecf6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -144,11 +144,10 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); int evlist__poll(struct evlist *evlist, int timeout); -struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id); -struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, - u64 id); +struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id); +struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id); -struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id); +struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id); void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state); @@ -325,8 +324,7 @@ bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); -struct evsel *perf_evlist__event2evsel(struct evlist *evlist, - union perf_event *event); +struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event); bool perf_evlist__exclude_kernel(struct evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9005cc974f43..3dd0eae9810d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1469,7 +1469,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, for (i = 1; i < nr; i++) { struct evsel *counter; - counter = perf_evlist__id2evsel(leader->evlist, v[i].id); + counter = evlist__id2evsel(leader->evlist, v[i].id); if (!counter) return -EINVAL; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 64a3b83b3090..be219051119c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4030,7 +4030,7 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, evlist = *pevlist; - evsel = perf_evlist__id2evsel(evlist, ev->id); + evsel = evlist__id2evsel(evlist, ev->id); if (evsel == NULL) return -EINVAL; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3a0348caec7d..60214de42f31 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2520,11 +2520,10 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, static int intel_pt_process_switch(struct intel_pt *pt, struct perf_sample *sample) { - struct evsel *evsel; pid_t tid; int cpu, ret; + struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id); - evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); if (evsel != pt->switch_evsel) return 0; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3d3b46514863..cc5ade85a33f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1055,7 +1055,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (pyevent == NULL) return PyErr_NoMemory(); - evsel = perf_evlist__event2evsel(evlist, event); + evsel = evlist__event2evsel(evlist, event); if (!evsel) { Py_INCREF(Py_None); return Py_None; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 959e9890bccc..078a71773565 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -932,7 +932,7 @@ s390_cpumsf_process_event(struct perf_session *session, if (event->header.type == PERF_RECORD_SAMPLE && sample->raw_size) { /* Handle event with raw data */ - ev_bc000 = perf_evlist__event2evsel(session->evlist, event); + ev_bc000 = evlist__event2evsel(session->evlist, event); if (ev_bc000 && ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) err = s390_cpumcf_dumpctr(sf, sample); diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 05b43ab4eeef..d177e6179839 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -205,7 +205,7 @@ void perf_evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event if (event->header.type != PERF_RECORD_SAMPLE) return; - ev_bc000 = perf_evlist__event2evsel(evlist, event); + ev_bc000 = evlist__event2evsel(evlist, event); if (ev_bc000 == NULL || ev_bc000->core.attr.config != PERF_EVENT_CPUM_CF_DIAG) return; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 61e4b3cc7313..ce381b3dca06 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1364,7 +1364,7 @@ static int deliver_sample_value(struct evlist *evlist, struct sample_read_value *v, struct machine *machine) { - struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); + struct perf_sample_id *sid = evlist__id2sid(evlist, v->id); struct evsel *evsel; if (sid) { @@ -1445,7 +1445,7 @@ static int machines__deliver_event(struct machines *machines, dump_event(evlist, event, file_offset, sample); - evsel = perf_evlist__id2evsel(evlist, sample->id); + evsel = evlist__id2evsel(evlist, sample->id); machine = machines__find_for_cpumode(machines, event, sample); @@ -2476,7 +2476,7 @@ int perf_event__process_id_index(struct perf_session *session, fprintf(stdout, " tid: %"PRI_ld64"\n", e->tid); } - sid = perf_evlist__id2sid(evlist, e->id); + sid = evlist__id2sid(evlist, e->id); if (!sid) return -ENOENT; sid->idx = e->idx; diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index 90ed016bb348..9c04c71dbf51 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -62,7 +62,7 @@ static void *perf_evlist__poll_thread(void *arg) if (perf_mmap__read_init(&map->core)) continue; while ((event = perf_mmap__read_event(&map->core)) != NULL) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + struct evsel *evsel = evlist__event2evsel(evlist, event); if (evsel && evsel->side_band.cb) evsel->side_band.cb(event, evsel->side_band.data); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8be9c3da9e56..1e125e39ff84 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -458,7 +458,7 @@ int perf_event__process_stat_event(struct perf_session *session, count.ena = st->ena; count.run = st->run; - counter = perf_evlist__id2evsel(session->evlist, st->id); + counter = evlist__id2evsel(session->evlist, st->id); if (!counter) { pr_err("Failed to resolve counter for stat event.\n"); return -EINVAL; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index d9c624377da7..297987c6960b 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1643,7 +1643,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ e->id = evsel->core.id[j]; - sid = perf_evlist__id2sid(evlist, e->id); + sid = evlist__id2sid(evlist, e->id); if (!sid) { free(ev); return -ENOENT; -- cgit v1.3.1 From f4bd0b4a9b21c609ede28cee2dcd16824c0489a8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:23:35 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' browser methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 19 +++++++------------ tools/perf/builtin-top.c | 9 +++------ tools/perf/ui/browsers/hists.c | 25 ++++++++----------------- tools/perf/ui/gtk/gtk.h | 5 ++--- tools/perf/ui/gtk/hists.c | 6 ++---- tools/perf/util/hist.h | 23 ++++++++++------------- 6 files changed, 32 insertions(+), 55 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3c74c9c0f3c3..11f13aafde44 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -493,8 +493,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report return ret + fprintf(fp, "\n#\n"); } -static int perf_evlist__tui_block_hists_browse(struct evlist *evlist, - struct report *rep) +static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report *rep) { struct evsel *pos; int i = 0, ret; @@ -511,9 +510,7 @@ static int perf_evlist__tui_block_hists_browse(struct evlist *evlist, return 0; } -static int perf_evlist__tty_browse_hists(struct evlist *evlist, - struct report *rep, - const char *help) +static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, const char *help) { struct evsel *pos; int i = 0; @@ -595,7 +592,7 @@ static int report__gtk_browse_hists(struct report *rep, const char *help) int (*hist_browser)(struct evlist *evlist, const char *help, struct hist_browser_timer *timer, float min_pcnt); - hist_browser = dlsym(perf_gtk_handle, "perf_evlist__gtk_browse_hists"); + hist_browser = dlsym(perf_gtk_handle, "evlist__gtk_browse_hists"); if (hist_browser == NULL) { ui__error("GTK browser not found!\n"); @@ -622,14 +619,12 @@ static int report__browse_hists(struct report *rep) switch (use_browser) { case 1: if (rep->total_cycles_mode) { - ret = perf_evlist__tui_block_hists_browse(evlist, rep); + ret = evlist__tui_block_hists_browse(evlist, rep); break; } - ret = perf_evlist__tui_browse_hists(evlist, help, NULL, - rep->min_percent, - &session->header.env, - true, &rep->annotation_opts); + ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, + &session->header.env, true, &rep->annotation_opts); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. @@ -641,7 +636,7 @@ static int report__browse_hists(struct report *rep) ret = report__gtk_browse_hists(rep, help); break; default: - ret = perf_evlist__tty_browse_hists(evlist, rep, help); + ret = evlist__tty_browse_hists(evlist, rep, help); break; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 88bd1ac5c8cf..5601a35a5ba4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -641,12 +641,9 @@ repeat: hists->uid_filter_str = top->record_opts.target.uid_str; } - ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt, - top->min_percent, - &top->session->header.env, - !top->record_opts.overwrite, - &top->annotation_opts); - + ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, + &top->session->header.env, !top->record_opts.overwrite, + &top->annotation_opts); if (ret == K_RELOAD) { top->zero = true; goto repeat; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 48814efb98e8..2f3ce505eec5 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3559,13 +3559,9 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused, return false; } -static int __perf_evlist__tui_browse_hists(struct evlist *evlist, - int nr_entries, const char *help, - struct hist_browser_timer *hbt, - float min_pcnt, - struct perf_env *env, - bool warn_lost_event, - struct annotation_options *annotation_opts) +static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help, + struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, + bool warn_lost_event, struct annotation_options *annotation_opts) { struct evsel *pos; struct evsel_menu menu = { @@ -3614,12 +3610,9 @@ static bool perf_evlist__single_entry(struct evlist *evlist) return false; } -int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, - struct hist_browser_timer *hbt, - float min_pcnt, - struct perf_env *env, - bool warn_lost_event, - struct annotation_options *annotation_opts) +int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, + float min_pcnt, struct perf_env *env, bool warn_lost_event, + struct annotation_options *annotation_opts) { int nr_entries = evlist->core.nr_entries; @@ -3645,10 +3638,8 @@ single_entry: { goto single_entry; } - return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, - hbt, min_pcnt, env, - warn_lost_event, - annotation_opts); + return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, + warn_lost_event, annotation_opts); } static int block_hists_browser__title(struct hist_browser *browser, char *bf, diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index a9563932fa04..a2b497f03fd6 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -57,9 +57,8 @@ struct evlist; struct hist_entry; struct hist_browser_timer; -int perf_evlist__gtk_browse_hists(struct evlist *evlist, const char *help, - struct hist_browser_timer *hbt, - float min_pcnt); +int evlist__gtk_browse_hists(struct evlist *evlist, const char *help, + struct hist_browser_timer *hbt, float min_pcnt); int hist_entry__gtk_annotate(struct hist_entry *he, struct evsel *evsel, struct hist_browser_timer *hbt); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 53ef71a1b15d..c83be2d57f7e 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -590,10 +590,8 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, gtk_container_add(GTK_CONTAINER(window), view); } -int perf_evlist__gtk_browse_hists(struct evlist *evlist, - const char *help, - struct hist_browser_timer *hbt __maybe_unused, - float min_pcnt) +int evlist__gtk_browse_hists(struct evlist *evlist, const char *help, + struct hist_browser_timer *hbt __maybe_unused, float min_pcnt) { struct evsel *pos; GtkWidget *vbox; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 96b1c13bbccc..278b49e915b0 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -464,12 +464,9 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, struct hist_browser_timer *hbt, struct annotation_options *annotation_opts); -int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, - struct hist_browser_timer *hbt, - float min_pcnt, - struct perf_env *env, - bool warn_lost_event, - struct annotation_options *annotation_options); +int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, + float min_pcnt, struct perf_env *env, bool warn_lost_event, + struct annotation_options *annotation_options); int script_browse(const char *script_opt, struct evsel *evsel); @@ -483,13 +480,13 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, struct annotation_options *annotation_opts); #else static inline -int perf_evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, - const char *help __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - float min_pcnt __maybe_unused, - struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) +int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, + const char *help __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused, + float min_pcnt __maybe_unused, + struct perf_env *env __maybe_unused, + bool warn_lost_event __maybe_unused, + struct annotation_options *annotation_options __maybe_unused) { return 0; } -- cgit v1.3.1 From e80db255525a014a78af414b346413142e9142da Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:39:41 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' tracking event methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/builtin-record.c | 4 ++-- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/evlist.c | 5 ++--- tools/perf/util/evlist.h | 5 ++--- 8 files changed, 11 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index cad7bf783413..bc0afeb903c2 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -420,7 +420,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, goto out; tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index e3593063b3d1..beccf1b36654 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -130,7 +130,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, return err; tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 0dc09b5809c1..66ca98df48b0 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -238,7 +238,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 082e5f2a415a..f1b5380a7401 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -865,7 +865,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 04946bd3d090..2d74a410d008 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -891,13 +891,13 @@ static int record__open(struct record *rec) * event synthesis. */ if (opts->initial_delay || target__has_cpu(&opts->target)) { - pos = perf_evlist__get_tracking_event(evlist); + pos = evlist__get_tracking_event(evlist); if (!evsel__is_dummy_event(pos)) { /* Set up dummy event. */ if (evlist__add_dummy(evlist)) return -ENOMEM; pos = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, pos); + evlist__set_tracking_event(evlist, pos); } /* diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 3c0bc3f1fd0e..8bf484a0f35d 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -424,7 +424,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 51775ff2979f..b5a53e118564 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1541,7 +1541,7 @@ void perf_evlist__to_front(struct evlist *evlist, list_splice(&move, &evlist->core.entries); } -struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist) +struct evsel *evlist__get_tracking_event(struct evlist *evlist) { struct evsel *evsel; @@ -1553,8 +1553,7 @@ struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist) return evlist__first(evlist); } -void perf_evlist__set_tracking_event(struct evlist *evlist, - struct evsel *tracking_evsel) +void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 90f1127fecf6..14bf7e2f9c0e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -314,9 +314,8 @@ void perf_evlist__to_front(struct evlist *evlist, evlist__cpu_iter_start(evlist); \ perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus) -struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist); -void perf_evlist__set_tracking_event(struct evlist *evlist, - struct evsel *tracking_evsel); +struct evsel *evlist__get_tracking_event(struct evlist *evlist); +void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); void evlist__cpu_iter_start(struct evlist *evlist); bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); -- cgit v1.3.1 From 2a99ff822dfa4a88d54b2c4f17d33748bcedd899 Mon Sep 17 00:00:00 2001 From: Alexandre Truong Date: Fri, 27 Nov 2020 15:39:23 +0000 Subject: perf tools: Add aarch64 registers to --user-regs Previously, this command returns no help message on aarch64: -> ./perf record --user-regs=? available registers: Usage: perf record [] [] or: perf record [] -- [] With this change, the registers are listed. -> ./perf record --user-regs=? available registers: x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 lr sp pc It's also now possible to record subsets of registers on aarch64: -> ./perf record --user-regs=x4,x5 ls -> ./perf report --dump-raw-trace 12801163749305260 0xc70 [0x40]: PERF_RECORD_SAMPLE(IP, 0x2): 51956/51956: 0xffffaa6571f0 period: 145785 addr: 0 ... user regs: mask 0x30 ABI 64-bit .... x4 0x000000000000006c .... x5 0x0000001001000001 ... thread: ls:51956 ...... dso: /usr/lib64/ld-2.17.so Signed-off-by: Alexandre Truong Tested-by: James Clark Acked-by: John Garry Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20201127153923.26717-1-alexandre.truong@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/perf_regs.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2833e101a7c6..54efa12fdbea 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -2,5 +2,38 @@ #include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { + SMPL_REG(x0, PERF_REG_ARM64_X0), + SMPL_REG(x1, PERF_REG_ARM64_X1), + SMPL_REG(x2, PERF_REG_ARM64_X2), + SMPL_REG(x3, PERF_REG_ARM64_X3), + SMPL_REG(x4, PERF_REG_ARM64_X4), + SMPL_REG(x5, PERF_REG_ARM64_X5), + SMPL_REG(x6, PERF_REG_ARM64_X6), + SMPL_REG(x7, PERF_REG_ARM64_X7), + SMPL_REG(x8, PERF_REG_ARM64_X8), + SMPL_REG(x9, PERF_REG_ARM64_X9), + SMPL_REG(x10, PERF_REG_ARM64_X10), + SMPL_REG(x11, PERF_REG_ARM64_X11), + SMPL_REG(x12, PERF_REG_ARM64_X12), + SMPL_REG(x13, PERF_REG_ARM64_X13), + SMPL_REG(x14, PERF_REG_ARM64_X14), + SMPL_REG(x15, PERF_REG_ARM64_X15), + SMPL_REG(x16, PERF_REG_ARM64_X16), + SMPL_REG(x17, PERF_REG_ARM64_X17), + SMPL_REG(x18, PERF_REG_ARM64_X18), + SMPL_REG(x19, PERF_REG_ARM64_X19), + SMPL_REG(x20, PERF_REG_ARM64_X20), + SMPL_REG(x21, PERF_REG_ARM64_X21), + SMPL_REG(x22, PERF_REG_ARM64_X22), + SMPL_REG(x23, PERF_REG_ARM64_X23), + SMPL_REG(x24, PERF_REG_ARM64_X24), + SMPL_REG(x25, PERF_REG_ARM64_X25), + SMPL_REG(x26, PERF_REG_ARM64_X26), + SMPL_REG(x27, PERF_REG_ARM64_X27), + SMPL_REG(x28, PERF_REG_ARM64_X28), + SMPL_REG(x29, PERF_REG_ARM64_X29), + SMPL_REG(lr, PERF_REG_ARM64_LR), + SMPL_REG(sp, PERF_REG_ARM64_SP), + SMPL_REG(pc, PERF_REG_ARM64_PC), SMPL_REG_END }; -- cgit v1.3.1 From 0a7e7ec90e601d98cc5914626b78fd043598b85b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:44:40 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' id_pos methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 12 ++++++------ tools/perf/util/evlist.h | 2 +- tools/perf/util/record.c | 2 +- tools/perf/util/sideband_evlist.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b5a53e118564..26c436abac77 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -103,13 +103,13 @@ struct evlist *perf_evlist__new_dummy(void) } /** - * perf_evlist__set_id_pos - set the positions of event ids. + * evlist__set_id_pos - set the positions of event ids. * @evlist: selected event list * * Events with compatible sample types all have the same id_pos * and is_pos. For convenience, put a copy on evlist. */ -void perf_evlist__set_id_pos(struct evlist *evlist) +void evlist__set_id_pos(struct evlist *evlist) { struct evsel *first = evlist__first(evlist); @@ -117,14 +117,14 @@ void perf_evlist__set_id_pos(struct evlist *evlist) evlist->is_pos = first->is_pos; } -static void perf_evlist__update_id_pos(struct evlist *evlist) +static void evlist__update_id_pos(struct evlist *evlist) { struct evsel *evsel; evlist__for_each_entry(evlist, evsel) evsel__calc_id_pos(evsel); - perf_evlist__set_id_pos(evlist); + evlist__set_id_pos(evlist); } static void evlist__purge(struct evlist *evlist) @@ -168,7 +168,7 @@ void evlist__add(struct evlist *evlist, struct evsel *entry) perf_evlist__add(&evlist->core, &entry->core); if (evlist->core.nr_entries == 1) - perf_evlist__set_id_pos(evlist); + evlist__set_id_pos(evlist); } void evlist__remove(struct evlist *evlist, struct evsel *evsel) @@ -1275,7 +1275,7 @@ int evlist__open(struct evlist *evlist) goto out_err; } - perf_evlist__update_id_pos(evlist); + evlist__update_id_pos(evlist); evlist__for_each_entry(evlist, evsel) { err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 14bf7e2f9c0e..85da0c443ae6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -158,7 +158,7 @@ void evlist__close(struct evlist *evlist); struct callchain_param; -void perf_evlist__set_id_pos(struct evlist *evlist); +void evlist__set_id_pos(struct evlist *evlist); void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 7330407db4ba..220b9910427b 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -144,7 +144,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, evsel__set_sample_id(evsel, use_sample_identifier); } - perf_evlist__set_id_pos(evlist); + evlist__set_id_pos(evlist); } static int get_max_rate(unsigned int *rate) diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index 9c04c71dbf51..13f387b76456 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -110,7 +110,7 @@ int evlist__start_sb_thread(struct evlist *evlist, struct target *target) evlist__for_each_entry(evlist, counter) evsel__set_sample_id(counter, can_sample_identifier); - perf_evlist__set_id_pos(evlist); + evlist__set_id_pos(evlist); } evlist__for_each_entry(evlist, counter) { -- cgit v1.3.1 From 37b01abe2a63db1b6a69af32257cb50795c725f8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:47:05 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' enable event methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 3 +-- tools/perf/util/evlist.c | 16 ++++++---------- tools/perf/util/evlist.h | 3 +-- 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index b538b9093567..8c94e21db2aa 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -658,8 +658,7 @@ int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx) if (evsel->core.attr.type == itr->pmu->type) { if (evsel->disabled) return 0; - return perf_evlist__enable_event_idx(itr->evlist, evsel, - idx); + return evlist__enable_event_idx(itr->evlist, evsel, idx); } } return -EINVAL; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 26c436abac77..1c7b6d4a76be 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -448,8 +448,7 @@ void evlist__toggle_enable(struct evlist *evlist) (evlist->enabled ? evlist__disable : evlist__enable)(evlist); } -static int perf_evlist__enable_event_cpu(struct evlist *evlist, - struct evsel *evsel, int cpu) +static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu) { int thread; int nr_threads = perf_evlist__nr_threads(evlist, evsel); @@ -465,9 +464,7 @@ static int perf_evlist__enable_event_cpu(struct evlist *evlist, return 0; } -static int perf_evlist__enable_event_thread(struct evlist *evlist, - struct evsel *evsel, - int thread) +static int evlist__enable_event_thread(struct evlist *evlist, struct evsel *evsel, int thread) { int cpu; int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); @@ -483,15 +480,14 @@ static int perf_evlist__enable_event_thread(struct evlist *evlist, return 0; } -int perf_evlist__enable_event_idx(struct evlist *evlist, - struct evsel *evsel, int idx) +int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) { bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus); if (per_cpu_mmaps) - return perf_evlist__enable_event_cpu(evlist, evsel, idx); - else - return perf_evlist__enable_event_thread(evlist, evsel, idx); + return evlist__enable_event_cpu(evlist, evsel, idx); + + return evlist__enable_event_thread(evlist, evsel, idx); } int evlist__add_pollfd(struct evlist *evlist, int fd) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 85da0c443ae6..f563e546b055 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -190,8 +190,7 @@ void evlist__disable(struct evlist *evlist); void evlist__enable(struct evlist *evlist); void evlist__toggle_enable(struct evlist *evlist); -int perf_evlist__enable_event_idx(struct evlist *evlist, - struct evsel *evsel, int idx); +int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); void perf_evlist__set_selected(struct evlist *evlist, struct evsel *evsel); -- cgit v1.3.1 From 0a60b339475970213f2685d0da55a26d5f4f22f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:49:05 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' pause/resume methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1c7b6d4a76be..3ca211e533ba 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -604,7 +604,7 @@ struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event return NULL; } -static int perf_evlist__set_paused(struct evlist *evlist, bool value) +static int evlist__set_paused(struct evlist *evlist, bool value) { int i; @@ -624,14 +624,14 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value) return 0; } -static int perf_evlist__pause(struct evlist *evlist) +static int evlist__pause(struct evlist *evlist) { - return perf_evlist__set_paused(evlist, true); + return evlist__set_paused(evlist, true); } -static int perf_evlist__resume(struct evlist *evlist) +static int evlist__resume(struct evlist *evlist) { - return perf_evlist__set_paused(evlist, false); + return evlist__set_paused(evlist, false); } static void evlist__munmap_nofree(struct evlist *evlist) @@ -1621,10 +1621,10 @@ void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state) switch (action) { case PAUSE: - perf_evlist__pause(evlist); + evlist__pause(evlist); break; case RESUME: - perf_evlist__resume(evlist); + evlist__resume(evlist); break; case NONE: default: -- cgit v1.3.1 From e414fd1a3f709984a03f0fa287e39df6a7218e22 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:52:44 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' evsel list methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/expand-cgroup.c | 4 ++-- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/ui/browsers/hists.c | 4 ++-- tools/perf/util/cgroup.c | 4 ++-- tools/perf/util/evlist.c | 8 +++----- tools/perf/util/evlist.h | 8 +++----- tools/perf/util/parse-events.c | 2 +- tools/perf/util/sort.c | 2 +- 13 files changed, 20 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index bc0afeb903c2..bd446aba64f7 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -395,7 +395,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, * To obtain the auxtrace buffer file descriptor, the auxtrace * event must come first. */ - perf_evlist__to_front(evlist, cs_etm_evsel); + evlist__to_front(evlist, cs_etm_evsel); /* * In the case of per-cpu mmaps, we need the CPU on the diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index beccf1b36654..414c8a5584b1 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -118,7 +118,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, * To obtain the auxtrace buffer file descriptor, the auxtrace event * must come first. */ - perf_evlist__to_front(evlist, arm_spe_evsel); + evlist__to_front(evlist, arm_spe_evsel); evsel__set_sample_bit(arm_spe_evsel, CPU); evsel__set_sample_bit(arm_spe_evsel, TIME); diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 66ca98df48b0..4a76d49d25d6 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -218,7 +218,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * To obtain the auxtrace buffer file descriptor, the auxtrace event * must come first. */ - perf_evlist__to_front(evlist, intel_bts_evsel); + evlist__to_front(evlist, intel_bts_evsel); /* * In the case of per-cpu mmaps, we need the CPU on the * AUX event. diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index f1b5380a7401..cb9c0c85dc8f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -846,7 +846,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * To obtain the auxtrace buffer file descriptor, the auxtrace * event must come first. */ - perf_evlist__to_front(evlist, intel_pt_evsel); + evlist__to_front(evlist, intel_pt_evsel); /* * In the case of per-cpu mmaps, we need the CPU on the * AUX event. diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index d880c588a951..7ed284a00ea2 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -157,7 +157,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), goto out_delete_evlist; } - perf_evlist__splice_list_tail(evlist, &parse_state.list); + evlist__splice_list_tail(evlist, &parse_state.list); evlist->nr_groups = parse_state.nr_groups; perf_evlist__config(evlist, &opts, NULL); diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index 4c59f3ae438f..e8187f37fe1e 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -26,7 +26,7 @@ static int test_expand_events(struct evlist *evlist, char **ev_name; struct evsel *evsel; - TEST_ASSERT_VAL("evlist is empty", !perf_evlist__empty(evlist)); + TEST_ASSERT_VAL("evlist is empty", !evlist__empty(evlist)); nr_events = evlist->core.nr_entries; ev_name = calloc(nr_events, sizeof(*ev_name)); @@ -161,7 +161,7 @@ static int expand_libpfm_events(void) event_str, ret); goto out; } - if (perf_evlist__empty(evlist)) { + if (evlist__empty(evlist)) { pr_debug("libpfm was not enabled\n"); goto out; } diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 8bf484a0f35d..e07fe84bb304 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -406,7 +406,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ pr_debug("cycles event already at front"); goto out_err; } - perf_evlist__to_front(evlist, cycles_evsel); + evlist__to_front(evlist, cycles_evsel); if (cycles_evsel != evlist__first(evlist)) { pr_debug("Failed to move cycles event to front"); goto out_err; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2f3ce505eec5..766be504abca 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3593,7 +3593,7 @@ static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, con hbt, warn_lost_event); } -static bool perf_evlist__single_entry(struct evlist *evlist) +static bool evlist__single_entry(struct evlist *evlist) { int nr_entries = evlist->core.nr_entries; @@ -3616,7 +3616,7 @@ int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct his { int nr_entries = evlist->core.nr_entries; - if (perf_evlist__single_entry(evlist)) { + if (evlist__single_entry(evlist)) { single_entry: { struct evsel *first = evlist__first(evlist); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 704333748549..5dff7e489921 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -371,7 +371,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, } /* save original events and init evlist */ - perf_evlist__splice_list_tail(orig_list, &evlist->core.entries); + evlist__splice_list_tail(orig_list, &evlist->core.entries); evlist->core.nr_entries = 0; if (metric_events) { @@ -430,7 +430,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, goto out_err; } - perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries); + evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3ca211e533ba..28b4d347d4ba 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -177,8 +177,7 @@ void evlist__remove(struct evlist *evlist, struct evsel *evsel) perf_evlist__remove(&evlist->core, &evsel->core); } -void perf_evlist__splice_list_tail(struct evlist *evlist, - struct list_head *list) +void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list) { struct evsel *evsel, *temp; @@ -273,7 +272,7 @@ static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attr list_add_tail(&evsel->core.node, &head); } - perf_evlist__splice_list_tail(evlist, &head); + evlist__splice_list_tail(evlist, &head); return 0; @@ -1520,8 +1519,7 @@ int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size return 0; } -void perf_evlist__to_front(struct evlist *evlist, - struct evsel *move_evsel) +void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel) { struct evsel *evsel, *n; LIST_HEAD(move); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f563e546b055..8d3fdeab2f22 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -214,10 +214,9 @@ bool evlist__valid_sample_type(struct evlist *evlist); bool evlist__valid_sample_id_all(struct evlist *evlist); bool perf_evlist__valid_read_format(struct evlist *evlist); -void perf_evlist__splice_list_tail(struct evlist *evlist, - struct list_head *list); +void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list); -static inline bool perf_evlist__empty(struct evlist *evlist) +static inline bool evlist__empty(struct evlist *evlist) { return list_empty(&evlist->core.entries); } @@ -240,8 +239,7 @@ int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); bool perf_evlist__can_select_event(struct evlist *evlist, const char *str); -void perf_evlist__to_front(struct evlist *evlist, - struct evsel *move_evsel); +void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); /** * __evlist__for_each_entry - iterate thru all the evsels diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8fa87f60133f..42c84adeb2fb 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2183,7 +2183,7 @@ int __parse_events(struct evlist *evlist, const char *str, /* * Add list to the evlist even with errors to allow callers to clean up. */ - perf_evlist__splice_list_tail(evlist, &parse_state.list); + evlist__splice_list_tail(evlist, &parse_state.list); if (!ret) { struct evsel *last; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d42339df20f8..7d87bfcffb3f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2756,7 +2756,7 @@ static const char *get_default_sort_order(struct evlist *evlist) BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders)); - if (evlist == NULL || perf_evlist__empty(evlist)) + if (evlist == NULL || evlist__empty(evlist)) goto out_no_evlist; evlist__for_each_entry(evlist, evsel) { -- cgit v1.3.1 From 712737241980476a277a4108e3121240a29de968 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:55:12 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' print methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 3 +-- tools/perf/util/hist.c | 2 +- tools/perf/util/hist.h | 2 +- tools/perf/util/session.c | 6 ++---- tools/perf/util/stat-display.c | 8 ++------ tools/perf/util/stat.h | 8 ++------ 8 files changed, 11 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4940d10074c3..d6a81a6e1041 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -412,7 +412,7 @@ static int __cmd_annotate(struct perf_annotate *ann) if (dump_trace) { perf_session__fprintf_nr_events(session, stdout); - perf_evlist__fprintf_nr_events(session->evlist, stdout); + evlist__fprintf_nr_events(session->evlist, stdout); goto out; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 11f13aafde44..0a335bef64d0 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -928,7 +928,7 @@ static int __cmd_report(struct report *rep) if (dump_trace) { perf_session__fprintf_nr_events(session, stdout); - perf_evlist__fprintf_nr_events(session->evlist, stdout); + evlist__fprintf_nr_events(session->evlist, stdout); return 0; } } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 29f7d4752e84..d69378f135e6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -974,8 +974,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (stat_config.quiet) return; - perf_evlist__print_counters(evsel_list, &stat_config, &target, - ts, argc, argv); + evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); } static volatile int signr = -1; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 8a793e4c9400..7feeaa07c777 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2654,7 +2654,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, } } -size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) { struct evsel *pos; size_t ret = 0; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 278b49e915b0..df6c6eea0960 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -196,7 +196,7 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp, bool ignore_callchains); -size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp); +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ce381b3dca06..6707a01b7ef8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1191,9 +1191,7 @@ static void stack_user__printf(struct stack_dump *dump) dump->size, dump->offset); } -static void perf_evlist__print_tstamp(struct evlist *evlist, - union perf_event *event, - struct perf_sample *sample) +static void evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { u64 sample_type = __evlist__combined_sample_type(evlist); @@ -1254,7 +1252,7 @@ static void dump_event(struct evlist *evlist, union perf_event *event, evlist->trace_event_sample_raw(evlist, event, sample); if (sample) - perf_evlist__print_tstamp(evlist, event, sample); + evlist__print_tstamp(evlist, event, sample); printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, event->header.size, perf_event__name(event->header.type)); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a963b5b8eb72..fee7543843a8 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1184,12 +1184,8 @@ static void print_percore(struct perf_stat_config *config, fputc('\n', output); } -void -perf_evlist__print_counters(struct evlist *evlist, - struct perf_stat_config *config, - struct target *_target, - struct timespec *ts, - int argc, const char **argv) +void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, + struct target *_target, struct timespec *ts, int argc, const char **argv) { bool metric_only = config->metric_only; int interval = config->interval; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 4cba4b106e45..9979b4b100f2 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -238,12 +238,8 @@ int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, int cpu); -void -perf_evlist__print_counters(struct evlist *evlist, - struct perf_stat_config *config, - struct target *_target, - struct timespec *ts, - int argc, const char **argv); +void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, + struct target *_target, struct timespec *ts, int argc, const char **argv); struct metric_expr; double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st); -- cgit v1.3.1 From 7748bb7175ccad5ee29e7355134b0061d8edf3d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:56:52 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' create maps methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/event-times.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 4 ++-- tools/perf/tests/perf-record.c | 2 +- tools/perf/util/evlist.c | 6 +++--- tools/perf/util/evlist.h | 2 +- tools/perf/util/sideband_evlist.c | 2 +- 14 files changed, 17 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index bf4d8e9ab8f5..d49448a1060c 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -957,7 +957,7 @@ int cmd_ftrace(int argc, const char **argv) goto out_delete_filters; } - ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target); + ret = evlist__create_maps(ftrace.evlist, &ftrace.target); if (ret < 0) goto out_delete_evlist; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index e1b60aad13cb..7560d6b1d6a3 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1442,7 +1442,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, goto out; } - if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0) + if (evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0) usage_with_options(live_usage, live_options); /* diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2d74a410d008..e956348c9081 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2792,7 +2792,7 @@ int cmd_record(int argc, const char **argv) rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; err = -ENOMEM; - if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) + if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d69378f135e6..1e967c32db7c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2249,7 +2249,7 @@ int cmd_stat(int argc, const char **argv) if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) target.per_thread = true; - if (perf_evlist__create_maps(evsel_list, &target) < 0) { + if (evlist__create_maps(evsel_list, &target) < 0) { if (target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); parse_options_usage(stat_usage, stat_options, "p", 1); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5601a35a5ba4..51600f656771 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1694,7 +1694,7 @@ int cmd_top(int argc, const char **argv) if (target__none(target)) target->system_wide = true; - if (perf_evlist__create_maps(top.evlist, target) < 0) { + if (evlist__create_maps(top.evlist, target) < 0) { ui__error("Couldn't create thread/CPU maps: %s\n", errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); goto out_delete_evlist; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a463ce8fb8e0..ebf4fe944525 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3950,7 +3950,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->cgroup) evlist__set_default_cgroup(trace->evlist, trace->cgroup); - err = perf_evlist__create_maps(evlist, &trace->opts.target); + err = evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 15cea518f5ad..f00f7f34efbd 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -109,7 +109,7 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m return TEST_FAIL; } - err = perf_evlist__create_maps(evlist, &opts.target); + err = evlist__create_maps(evlist, &opts.target); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 7ed284a00ea2..d4b232fe9448 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -151,7 +151,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), return TEST_FAIL; } - err = perf_evlist__create_maps(evlist, &opts.target); + err = evlist__create_maps(evlist, &opts.target); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); goto out_delete_evlist; diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 9da2d4f58b8e..04ce4401f775 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -26,7 +26,7 @@ static int attach__enable_on_exec(struct evlist *evlist) pr_debug("attaching to spawned child, enable on exec\n"); - err = perf_evlist__create_maps(evlist, &target); + err = evlist__create_maps(evlist, &target); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); return err; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 1f5f5e79ae25..78367b17f1bb 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -54,9 +54,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest evlist__add(evlist, evsel); - err = perf_evlist__create_maps(evlist, &opts.target); + err = evlist__create_maps(evlist, &opts.target); if (err < 0) { - pr_debug("%s: perf_evlist__create_maps\n", __func__); + pr_debug("%s: evlist__create_maps\n", __func__); goto out_delete_evlist; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index f0b2066c7ea9..01c9282c7a3b 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -84,7 +84,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - err = perf_evlist__create_maps(evlist, &opts.target); + err = evlist__create_maps(evlist, &opts.target); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); goto out_delete_evlist; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 28b4d347d4ba..e68e5a40b49a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -892,7 +892,7 @@ int evlist__mmap(struct evlist *evlist, unsigned int pages) return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); } -int perf_evlist__create_maps(struct evlist *evlist, struct target *target) +int evlist__create_maps(struct evlist *evlist, struct target *target) { bool all_threads = (target->per_thread && target->system_wide); struct perf_cpu_map *cpus; @@ -1223,7 +1223,7 @@ void evlist__close(struct evlist *evlist) } } -static int perf_evlist__create_syswide_maps(struct evlist *evlist) +static int evlist__create_syswide_maps(struct evlist *evlist) { struct perf_cpu_map *cpus; struct perf_thread_map *threads; @@ -1265,7 +1265,7 @@ int evlist__open(struct evlist *evlist) * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL */ if (evlist->core.threads == NULL && evlist->core.cpus == NULL) { - err = perf_evlist__create_syswide_maps(evlist); + err = evlist__create_syswide_maps(evlist); if (err < 0) goto out_err; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8d3fdeab2f22..3b3bf3d28204 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -195,7 +195,7 @@ int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx void perf_evlist__set_selected(struct evlist *evlist, struct evsel *evsel); -int perf_evlist__create_maps(struct evlist *evlist, struct target *target); +int evlist__create_maps(struct evlist *evlist, struct target *target); int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); void __evlist__set_leader(struct list_head *list); diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index 13f387b76456..748371ac22be 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -101,7 +101,7 @@ int evlist__start_sb_thread(struct evlist *evlist, struct target *target) if (!evlist) return 0; - if (perf_evlist__create_maps(evlist, target)) + if (evlist__create_maps(evlist, target)) goto out_delete_evlist; if (evlist->core.nr_entries > 1) { -- cgit v1.3.1 From 64b4778b863b6fa84e36e043fb34bde6b847fa96 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 14:58:32 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' event group methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/util/auxtrace.c | 6 ++---- tools/perf/util/evlist.c | 6 ++---- tools/perf/util/evlist.h | 7 +++---- 7 files changed, 11 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index d6a81a6e1041..a23ba6bb99b6 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -598,7 +598,7 @@ int cmd_annotate(int argc, const char **argv) HEADER_BRANCH_STACK); if (annotate.group_set) - perf_evlist__force_leader(annotate.session->evlist); + evlist__force_leader(annotate.session->evlist); ret = symbol__annotation_init(); if (ret < 0) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e956348c9081..412717406dc9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -923,7 +923,7 @@ try_again: if ((errno == EINVAL || errno == EBADF) && pos->leader != pos && pos->weak_group) { - pos = perf_evlist__reset_weak_group(evlist, pos, true); + pos = evlist__reset_weak_group(evlist, pos, true); goto try_again; } rc = -errno; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0a335bef64d0..e78614ab7a16 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -211,7 +211,7 @@ static void setup_forced_leader(struct report *report, struct evlist *evlist) { if (report->group_set) - perf_evlist__force_leader(evlist); + evlist__force_leader(evlist); } static int process_feature_event(struct perf_session *session, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1e967c32db7c..89c32692f40c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -759,7 +759,7 @@ try_again: if ((errno == EINVAL || errno == EBADF) && counter->leader != counter && counter->weak_group) { - perf_evlist__reset_weak_group(evsel_list, counter, false); + evlist__reset_weak_group(evsel_list, counter, false); assert(counter->reset_group); second_pass = true; continue; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 8c94e21db2aa..a60878498139 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -62,9 +62,7 @@ * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. */ -static int perf_evlist__regroup(struct evlist *evlist, - struct evsel *leader, - struct evsel *last) +static int evlist__regroup(struct evlist *evlist, struct evsel *leader, struct evsel *last) { struct evsel *evsel; bool grp; @@ -775,7 +773,7 @@ no_opt: evsel->core.attr.aux_sample_size = term->val.aux_sample_size; /* If possible, group with the AUX event */ if (aux_evsel && evsel->core.attr.aux_sample_size) - perf_evlist__regroup(evlist, aux_evsel, evsel); + evlist__regroup(evlist, aux_evsel, evsel); } } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e68e5a40b49a..869195b6f21c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1650,7 +1650,7 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist) * the group display. Set the artificial group and set the leader's * forced_leader flag to notify the display code. */ -void perf_evlist__force_leader(struct evlist *evlist) +void evlist__force_leader(struct evlist *evlist) { if (!evlist->nr_groups) { struct evsel *leader = evlist__first(evlist); @@ -1660,9 +1660,7 @@ void perf_evlist__force_leader(struct evlist *evlist) } } -struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, - struct evsel *evsel, - bool close) +struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *evsel, bool close) { struct evsel *c2, *leader; bool is_open = true; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 3b3bf3d28204..d09970e7c838 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -324,11 +324,10 @@ struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event bool perf_evlist__exclude_kernel(struct evlist *evlist); -void perf_evlist__force_leader(struct evlist *evlist); +void evlist__force_leader(struct evlist *evlist); + +struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evsel, bool close); -struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist, - struct evsel *evsel, - bool close); #define EVLIST_CTL_CMD_ENABLE_TAG "enable" #define EVLIST_CTL_CMD_DISABLE_TAG "disable" #define EVLIST_CTL_CMD_ACK_TAG "ack\n" -- cgit v1.3.1 From 900c8ead5b0b21d73236ffbc4bc2f47a506d8297 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:01:08 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' event selection methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/util/evlist.c | 3 +-- tools/perf/util/evlist.h | 5 ++--- tools/perf/util/record.c | 2 +- 6 files changed, 7 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index cb9c0c85dc8f..a6420c647959 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -416,7 +416,7 @@ static int intel_pt_track_switches(struct evlist *evlist) struct evsel *evsel; int err; - if (!perf_evlist__can_select_event(evlist, sched_switch)) + if (!evlist__can_select_event(evlist, sched_switch)) return -EPERM; err = parse_events(evlist, sched_switch, NULL); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index e07fe84bb304..a4cf5ee22c28 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -380,7 +380,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ cycles_evsel = evlist__last(evlist); /* Third event */ - if (!perf_evlist__can_select_event(evlist, sched_switch)) { + if (!evlist__can_select_event(evlist, sched_switch)) { pr_debug("No sched_switch\n"); err = 0; goto out; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 766be504abca..3b9818ee9546 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3494,7 +3494,7 @@ static int perf_evsel_menu__run(struct evsel_menu *menu, continue; pos = menu->selection; browse_hists: - perf_evlist__set_selected(evlist, pos); + evlist__set_selected(evlist, pos); /* * Give the calling tool a chance to populate the non * default evsel resorted hists tree. diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 869195b6f21c..a11364d94720 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1183,8 +1183,7 @@ bool evlist__sample_id_all(struct evlist *evlist) return first->core.attr.sample_id_all; } -void perf_evlist__set_selected(struct evlist *evlist, - struct evsel *evsel) +void evlist__set_selected(struct evlist *evlist, struct evsel *evsel) { evlist->selected = evsel; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index d09970e7c838..e858a351b014 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -192,8 +192,7 @@ void evlist__toggle_enable(struct evlist *evlist); int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); -void perf_evlist__set_selected(struct evlist *evlist, - struct evsel *evsel); +void evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int evlist__create_maps(struct evlist *evlist, struct target *target); int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); @@ -238,7 +237,7 @@ static inline struct evsel *evlist__last(struct evlist *evlist) int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size); int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); -bool perf_evlist__can_select_event(struct evlist *evlist, const char *str); +bool evlist__can_select_event(struct evlist *evlist, const char *str); void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); /** diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 220b9910427b..f72c8e05e15c 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -217,7 +217,7 @@ int record_opts__config(struct record_opts *opts) return record_opts__config_freq(opts); } -bool perf_evlist__can_select_event(struct evlist *evlist, const char *str) +bool evlist__can_select_event(struct evlist *evlist, const char *str) { struct evlist *temp_evlist; struct evsel *evsel; -- cgit v1.3.1 From 606e2c29334556797e1639115bd198aedb331f07 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:04:05 -0300 Subject: perf evlist: Use the right prefix for alternative 'struct evlist' constructors perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/intel-cqm.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/event_update.c | 3 +-- tools/perf/tests/expand-cgroup.c | 3 +-- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 4 ++-- tools/perf/tests/task-exit.c | 4 ++-- tools/perf/tests/topology.c | 2 +- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evlist.h | 4 ++-- 12 files changed, 16 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 3ec562a2aaba..27dd8cf9e060 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -52,7 +52,7 @@ int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subt evlist = evlist__new(); if (!evlist) { - pr_debug("perf_evlist__new failed\n"); + pr_debug("evlist__new failed\n"); return TEST_FAIL; } diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index d4b232fe9448..22d7f567c41c 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -144,7 +144,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), pid[sizeof(pid) - 1] = '\0'; opts.target.tid = opts.target.pid = pid; - /* Instead of perf_evlist__new_default, don't add default events */ + /* Instead of evlist__new_default, don't add default events */ evlist = evlist__new(); if (!evlist) { pr_debug("Not enough memory to create evlist\n"); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 339d432b1bf1..0c494aa90a8d 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -637,7 +637,7 @@ static int do_test_code_reading(bool try_kcore) evlist = evlist__new(); if (!evlist) { - pr_debug("perf_evlist__new failed\n"); + pr_debug("evlist__new failed\n"); goto out_put; } diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index bdcf032f8516..656218179222 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -85,11 +85,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused) { - struct evlist *evlist; struct evsel *evsel; struct event_name tmp; + struct evlist *evlist = evlist__new_default(); - evlist = perf_evlist__new_default(); TEST_ASSERT_VAL("failed to get evlist", evlist); evsel = evlist__first(evlist); diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index e8187f37fe1e..0e46aeb843ce 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -100,10 +100,9 @@ out: for (i = 0; i < nr_events; i++) static int expand_default_events(void) { int ret; - struct evlist *evlist; struct rblist metric_events; + struct evlist *evlist = evlist__new_default(); - evlist = perf_evlist__new_default(); TEST_ASSERT_VAL("failed to get evlist", evlist); rblist__init(&metric_events); diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index c01d7e12d241..57093aeacc6f 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -69,7 +69,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse evlist = evlist__new(); if (evlist == NULL) { - pr_debug("perf_evlist__new\n"); + pr_debug("evlist__new\n"); goto out_free_cpus; } diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 78367b17f1bb..5e4af2f0f14a 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -42,7 +42,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest char sbuf[STRERR_BUFSIZE]; if (evlist == NULL) { - pr_debug("%s: perf_evlist__new\n", __func__); + pr_debug("%s: evlist__new\n", __func__); goto out; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 01c9282c7a3b..61d1ff30253c 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -53,7 +53,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus }; cpu_set_t cpu_mask; size_t cpu_mask_size = sizeof(cpu_mask); - struct evlist *evlist = perf_evlist__new_dummy(); + struct evlist *evlist = evlist__new_dummy(); struct evsel *evsel; struct perf_sample sample; const char *cmd = "sleep"; @@ -71,7 +71,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus char sbuf[STRERR_BUFSIZE]; if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */ - evlist = perf_evlist__new_default(); + evlist = evlist__new_default(); if (evlist == NULL) { pr_debug("Not enough memory to create evlist\n"); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 452d51048cc1..bbf94e4aa145 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -58,9 +58,9 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused signal(SIGCHLD, sig_handler); - evlist = perf_evlist__new_default(); + evlist = evlist__new_default(); if (evlist == NULL) { - pr_debug("perf_evlist__new_default\n"); + pr_debug("evlist__new_default\n"); return -1; } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 22daf2bdf5fa..165feedc7863 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -40,7 +40,7 @@ static int session_write_header(char *path) session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); - session->evlist = perf_evlist__new_default(); + session->evlist = evlist__new_default(); TEST_ASSERT_VAL("can't get evlist", session->evlist); perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a11364d94720..70b5b6e506c8 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -78,7 +78,7 @@ struct evlist *evlist__new(void) return evlist; } -struct evlist *perf_evlist__new_default(void) +struct evlist *evlist__new_default(void) { struct evlist *evlist = evlist__new(); @@ -90,7 +90,7 @@ struct evlist *perf_evlist__new_default(void) return evlist; } -struct evlist *perf_evlist__new_dummy(void) +struct evlist *evlist__new_dummy(void) { struct evlist *evlist = evlist__new(); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e858a351b014..9e73d4ae16d9 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -87,8 +87,8 @@ struct evsel_str_handler { }; struct evlist *evlist__new(void); -struct evlist *perf_evlist__new_default(void); -struct evlist *perf_evlist__new_dummy(void); +struct evlist *evlist__new_default(void); +struct evlist *evlist__new_dummy(void); void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void evlist__exit(struct evlist *evlist); -- cgit v1.3.1 From 78e1bc25786656c490befc6d44d265f263cb8861 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:07:49 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' event attribute config methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 6 +++--- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 4 ++-- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/parse-events.c | 10 ++++------ tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/perf-time-to-tsc.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evlist.h | 7 +++---- tools/perf/util/record.c | 3 +-- tools/perf/util/session.c | 2 +- 17 files changed, 26 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 7560d6b1d6a3..616125160b58 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1022,7 +1022,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct evlist *evlist = kvm->evlist; char sbuf[STRERR_BUFSIZE]; - perf_evlist__config(evlist, &kvm->opts, NULL); + evlist__config(evlist, &kvm->opts, NULL); /* * Note: exclude_{guest,host} do not apply here. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 412717406dc9..2a732414ae99 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -910,7 +910,7 @@ static int record__open(struct record *rec) pos->immediate = 1; } - perf_evlist__config(evlist, opts, &callchain_param); + evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, pos) { try_again: @@ -935,7 +935,7 @@ try_again: pos->supported = true; } - if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) { + if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { pr_warning( "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" @@ -1444,7 +1444,7 @@ static int record__synthesize(struct record *rec, bool tail) goto out; } - if (!perf_evlist__exclude_kernel(rec->evlist)) { + if (!evlist__exclude_kernel(rec->evlist)) { err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, machine); WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e78614ab7a16..5efbd0602f17 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -563,7 +563,7 @@ static void report__warn_kptr_restrict(const struct report *rep) struct map *kernel_map = machine__kernel_map(&rep->session->machines.host); struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; - if (perf_evlist__exclude_kernel(rep->session->evlist)) + if (evlist__exclude_kernel(rep->session->evlist)) return; if (kernel_map == NULL || diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 51600f656771..f65aa1c22c2a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -779,7 +779,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { - if (!perf_evlist__exclude_kernel(top->session->evlist)) { + if (!evlist__exclude_kernel(top->session->evlist)) { ui__warning( "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" @@ -1022,7 +1022,7 @@ static int perf_top__start_counters(struct perf_top *top) goto out_err; } - perf_evlist__config(evlist, opts, &callchain_param); + evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, counter) { try_again: diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ebf4fe944525..fa7d57fc53da 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3962,7 +3962,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } - perf_evlist__config(evlist, &trace->opts, &callchain_param); + evlist__config(evlist, &trace->opts, &callchain_param); signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index f00f7f34efbd..b4b9a9488d51 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -127,7 +127,7 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m goto out_delete_evlist; } - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); err = evlist__open(evlist); if (err < 0) { diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 22d7f567c41c..f57e075b0ed2 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -160,7 +160,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), evlist__splice_list_tail(evlist, &parse_state.list); evlist->nr_groups = parse_state.nr_groups; - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); err = evlist__open(evlist); if (err < 0) { diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 0c494aa90a8d..7c098d49c77e 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -651,7 +651,7 @@ static int do_test_code_reading(bool try_kcore) goto out_put; } - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); evsel = evlist__first(evlist); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 50a0c9fcde7d..e6f1b2a38e03 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -92,7 +92,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un CHECK__(parse_events(evlist, "dummy:u", NULL)); CHECK__(parse_events(evlist, "cycles:u", NULL)); - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); evsel = evlist__first(evlist); diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 611512f22b34..a7f6661e6112 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -115,7 +115,7 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist) TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); /* - * The period value gets configured within perf_evlist__config, + * The period value gets configured within evlist__config, * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", @@ -443,7 +443,7 @@ static int test__checkevent_pmu(struct evlist *evlist) TEST_ASSERT_VAL("wrong config1", 1 == evsel->core.attr.config1); TEST_ASSERT_VAL("wrong config2", 3 == evsel->core.attr.config2); /* - * The period value gets configured within perf_evlist__config, + * The period value gets configured within evlist__config, * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period); @@ -520,8 +520,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); /* - * The period, time and callgraph value gets configured - * within perf_evlist__config, + * The period, time and callgraph value gets configured within evlist__config, * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period); @@ -533,8 +532,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config); /* - * The period, time and callgraph value gets configured - * within perf_evlist__config, + * The period, time and callgraph value gets configured within evlist__config, * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 61d1ff30253c..0df471bf1590 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -109,7 +109,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus evsel__set_sample_bit(evsel, CPU); evsel__set_sample_bit(evsel, TID); evsel__set_sample_bit(evsel, TIME); - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); if (err < 0) { diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index a9560e0f6360..7cff02664d0e 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -80,7 +80,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe CHECK__(parse_events(evlist, "cycles:u", NULL)); - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); evsel = evlist__first(evlist); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index a4cf5ee22c28..15a2ab765d89 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -432,7 +432,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ evsel__set_sample_bit(tracking_evsel, TIME); /* Config events */ - perf_evlist__config(evlist, &opts, NULL); + evlist__config(evlist, &opts, NULL); /* Check moved event is still at the front */ if (cycles_evsel != evlist__first(evlist)) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 70b5b6e506c8..7ca92a5d3206 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1110,7 +1110,7 @@ u64 evlist__combined_branch_type(struct evlist *evlist) return branch_type; } -bool perf_evlist__valid_read_format(struct evlist *evlist) +bool evlist__valid_read_format(struct evlist *evlist) { struct evsel *first = evlist__first(evlist), *pos = first; u64 read_format = first->core.attr.read_format; @@ -1632,7 +1632,7 @@ state_err: return; } -bool perf_evlist__exclude_kernel(struct evlist *evlist) +bool evlist__exclude_kernel(struct evlist *evlist) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 9e73d4ae16d9..ebc4550870a1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -159,8 +159,7 @@ void evlist__close(struct evlist *evlist); struct callchain_param; void evlist__set_id_pos(struct evlist *evlist); -void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, - struct callchain_param *callchain); +void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); int evlist__prepare_workload(struct evlist *evlist, struct target *target, @@ -211,7 +210,7 @@ int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *even bool evlist__valid_sample_type(struct evlist *evlist); bool evlist__valid_sample_id_all(struct evlist *evlist); -bool perf_evlist__valid_read_format(struct evlist *evlist); +bool evlist__valid_read_format(struct evlist *evlist); void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list); @@ -321,7 +320,7 @@ struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event); -bool perf_evlist__exclude_kernel(struct evlist *evlist); +bool evlist__exclude_kernel(struct evlist *evlist); void evlist__force_leader(struct evlist *evlist); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index f72c8e05e15c..e70c9dd04567 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -89,8 +89,7 @@ static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *ev leader->core.attr.sample_type; } -void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, - struct callchain_param *callchain) +void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain) { struct evsel *evsel; bool use_sample_identifier = false; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 6707a01b7ef8..bf10576d257a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -125,7 +125,7 @@ static int perf_session__open(struct perf_session *session) return -1; } - if (!perf_evlist__valid_read_format(session->evlist)) { + if (!evlist__valid_read_format(session->evlist)) { pr_err("non matching read_format\n"); return -1; } -- cgit v1.3.1 From 25f84702f3590ce6caa3e5bb98e001692f3a2b9e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:09:45 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' mmap pages parsing method perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 3 +-- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-top.c | 3 +-- tools/perf/builtin-trace.c | 3 +-- tools/perf/util/evlist.c | 7 +++---- tools/perf/util/evlist.h | 6 ++---- 6 files changed, 10 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 616125160b58..1105c9e40a80 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1349,8 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid", "record events on existing process id"), OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages", - "number of mmap data pages", - perf_evlist__parse_mmap_pages), + "number of mmap data pages", evlist__parse_mmap_pages), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2a732414ae99..92039fbf6337 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2221,7 +2221,7 @@ static int record__parse_mmap_pages(const struct option *opt, *p = '\0'; if (*s) { - ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); + ret = __evlist__parse_mmap_pages(&mmap_pages, s); if (ret) goto out_free; opts->mmap_pages = mmap_pages; @@ -2232,7 +2232,7 @@ static int record__parse_mmap_pages(const struct option *opt, goto out_free; } - ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); + ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); if (ret) goto out_free; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f65aa1c22c2a..3673c04d16b6 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1466,8 +1466,7 @@ int cmd_top(int argc, const char **argv) OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, "hide kernel symbols"), OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages", - "number of mmap data pages", - perf_evlist__parse_mmap_pages), + "number of mmap data pages", evlist__parse_mmap_pages), OPT_INTEGER('r', "realtime", &top.realtime_prio, "collect data with this RT SCHED_FIFO priority"), OPT_INTEGER('d', "delay", &top.delay_secs, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fa7d57fc53da..85b6a46e85b6 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4763,8 +4763,7 @@ int cmd_trace(int argc, const char **argv) OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, "child tasks do not inherit counters"), OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", - "number of mmap data pages", - perf_evlist__parse_mmap_pages), + "number of mmap data pages", evlist__parse_mmap_pages), OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", "user to profile"), OPT_CALLBACK(0, "duration", &trace, "float", diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7ca92a5d3206..59264a7a858a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -815,7 +815,7 @@ static long parse_pages_arg(const char *str, unsigned long min, return pages; } -int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) +int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) { unsigned long max = UINT_MAX; long pages; @@ -833,10 +833,9 @@ int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) return 0; } -int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, - int unset __maybe_unused) +int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused) { - return __perf_evlist__parse_mmap_pages(opt->value, str); + return __evlist__parse_mmap_pages(opt->value, str); } /** diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index ebc4550870a1..8a5b515193b3 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -169,10 +169,8 @@ int evlist__start_workload(struct evlist *evlist); struct option; -int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str); -int perf_evlist__parse_mmap_pages(const struct option *opt, - const char *str, - int unset); +int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str); +int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset); unsigned long perf_event_mlock_kb_in_pages(void); -- cgit v1.3.1 From 44d2a5573665ab5dfb72572e43184388d15d695e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:11:10 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' raw samples methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-sample-raw.c | 3 +-- tools/perf/util/sample-raw.c | 4 ++-- tools/perf/util/sample-raw.h | 7 ++----- tools/perf/util/session.c | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index d177e6179839..cfcf8d534d76 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -197,8 +197,7 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) * its raw data. * The function is only invoked when the dump flag -D is set. */ -void perf_evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, - struct perf_sample *sample) +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *ev_bc000; diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c index e84bbe0e441a..cde5cd3ce49b 100644 --- a/tools/perf/util/sample-raw.c +++ b/tools/perf/util/sample-raw.c @@ -9,10 +9,10 @@ * Check platform the perf data file was created on and perform platform * specific interpretation. */ -void perf_evlist__init_trace_event_sample_raw(struct evlist *evlist) +void evlist__init_trace_event_sample_raw(struct evlist *evlist) { const char *arch_pf = perf_env__arch(evlist->env); if (arch_pf && !strcmp("s390", arch_pf)) - evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw; + evlist->trace_event_sample_raw = evlist__s390_sample_raw; } diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h index afe1491a117e..4be84a5510cf 100644 --- a/tools/perf/util/sample-raw.h +++ b/tools/perf/util/sample-raw.h @@ -6,9 +6,6 @@ struct evlist; union perf_event; struct perf_sample; -void perf_evlist__s390_sample_raw(struct evlist *evlist, - union perf_event *event, - struct perf_sample *sample); - -void perf_evlist__init_trace_event_sample_raw(struct evlist *evlist); +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); +void evlist__init_trace_event_sample_raw(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index bf10576d257a..83de32090008 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -221,7 +221,7 @@ struct perf_session *perf_session__new(struct perf_data *data, perf_session__set_comm_exec(session); } - perf_evlist__init_trace_event_sample_raw(session->evlist); + evlist__init_trace_event_sample_raw(session->evlist); /* Open the directory data. */ if (data->is_dir) { -- cgit v1.3.1 From 1420ba2f6250270c4143d96af86f654f9f4d9997 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:13:12 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' header methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/evlist.h | 2 +- tools/perf/util/header.c | 15 +++++---------- tools/perf/util/session.c | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 59264a7a858a..23eef3cedc2f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1131,7 +1131,7 @@ bool evlist__valid_read_format(struct evlist *evlist) return true; } -u16 perf_evlist__id_hdr_size(struct evlist *evlist) +u16 evlist__id_hdr_size(struct evlist *evlist) { struct evsel *first = evlist__first(evlist); struct perf_sample *data; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8a5b515193b3..9b0c795736bb 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -201,7 +201,7 @@ u64 __evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_branch_type(struct evlist *evlist); bool evlist__sample_id_all(struct evlist *evlist); -u16 perf_evlist__id_hdr_size(struct evlist *evlist); +u16 evlist__id_hdr_size(struct evlist *evlist); int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index be219051119c..062383e225a3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2275,9 +2275,7 @@ static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx) return NULL; } -static void -perf_evlist__set_event_name(struct evlist *evlist, - struct evsel *event) +static void evlist__set_event_name(struct evlist *evlist, struct evsel *event) { struct evsel *evsel; @@ -2312,7 +2310,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) } for (evsel = events; evsel->core.attr.size; evsel++) - perf_evlist__set_event_name(session->evlist, evsel); + evlist__set_event_name(session->evlist, evsel); if (!session->data->is_pipe) free_event_desc(events); @@ -3765,8 +3763,7 @@ static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handl return 0; } -static int perf_evlist__prepare_tracepoint_events(struct evlist *evlist, - struct tep_handle *pevent) +static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_handle *pevent) { struct evsel *pos; @@ -3881,8 +3878,7 @@ int perf_session__read_header(struct perf_session *session) perf_header__process_sections(header, fd, &session->tevent, perf_file_section__process); - if (perf_evlist__prepare_tracepoint_events(session->evlist, - session->tevent.pevent)) + if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent)) goto out_delete_evlist; return 0; @@ -4103,8 +4099,7 @@ int perf_event__process_tracing_data(struct perf_session *session, return -1; } - perf_evlist__prepare_tracepoint_events(session->evlist, - session->tevent.pevent); + evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent); return size_read + padding; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 83de32090008..69799e747bf0 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -135,7 +135,7 @@ static int perf_session__open(struct perf_session *session) void perf_session__set_id_hdr_size(struct perf_session *session) { - u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist); + u16 id_hdr_size = evlist__id_hdr_size(session->evlist); machines__set_id_hdr_size(&session->machines, id_hdr_size); } -- cgit v1.3.1 From 515ea461c26e19ebca4351266480306979a113fc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:16:29 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' deliver event method perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 69799e747bf0..3b3c50b12791 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1407,13 +1407,9 @@ static int deliver_sample_group(struct evlist *evlist, return ret; } -static int - perf_evlist__deliver_sample(struct evlist *evlist, - struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct evsel *evsel, - struct machine *machine) +static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, + struct evsel *evsel, struct machine *machine) { /* We know evsel != NULL. */ u64 sample_type = evsel->core.attr.sample_type; @@ -1458,7 +1454,7 @@ static int machines__deliver_event(struct machines *machines, ++evlist->stats.nr_unprocessable_samples; return 0; } - return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); + return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); case PERF_RECORD_MMAP2: -- cgit v1.3.1 From f63c2f5a8b0eb4a7a8d5d19c8e0ccbbd0ee41d14 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:17:20 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' nr_threads method perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 23eef3cedc2f..493819173a8e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -330,8 +330,7 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, return 0; } -static int perf_evlist__nr_threads(struct evlist *evlist, - struct evsel *evsel) +static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) { if (evsel->core.system_wide) return 1; @@ -450,7 +449,7 @@ void evlist__toggle_enable(struct evlist *evlist) static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu) { int thread; - int nr_threads = perf_evlist__nr_threads(evlist, evsel); + int nr_threads = evlist__nr_threads(evlist, evsel); if (!evsel->core.fd) return -EINVAL; -- cgit v1.3.1 From b979a2f13b1b98c26b8f94d9401cd5255f75f978 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:18:48 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' diff methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index cefc71506409..8f6c784ce629 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -494,7 +494,7 @@ static struct evsel *evsel_match(struct evsel *evsel, return NULL; } -static void perf_evlist__collapse_resort(struct evlist *evlist) +static void evlist__collapse_resort(struct evlist *evlist) { struct evsel *evsel; @@ -1214,7 +1214,7 @@ static int __cmd_diff(void) goto out_delete; } - perf_evlist__collapse_resort(d->session->evlist); + evlist__collapse_resort(d->session->evlist); if (pdiff.ptime_range) zfree(&pdiff.ptime_range); -- cgit v1.3.1 From db0ea13cc741e7c93f26bf5b3d313f48d00f15a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Nov 2020 15:19:40 -0300 Subject: perf evlist: Use the right prefix for 'struct evlist' record methods perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/, go on completing this split. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 92039fbf6337..d832c108a1ca 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1349,8 +1349,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); static void alarm_sig_handler(int sig); -static const struct perf_event_mmap_page * -perf_evlist__pick_pc(struct evlist *evlist) +static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) { if (evlist) { if (evlist->mmap && evlist->mmap[0].core.base) @@ -1363,9 +1362,7 @@ perf_evlist__pick_pc(struct evlist *evlist) static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) { - const struct perf_event_mmap_page *pc; - - pc = perf_evlist__pick_pc(rec->evlist); + const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); if (pc) return pc; return NULL; -- cgit v1.3.1 From f3ed003e64fe7faecbe4c34bd2a1f5571a23f05a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 26 Oct 2020 18:59:27 +0200 Subject: kunit: Introduce get_file_path() helper Helper allows to derive file names depending on --build_dir argument. Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 2e3cc0fac726..57c1724b7e5d 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -23,6 +23,11 @@ DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' OUTFILE_PATH = 'test.log' +def get_file_path(build_dir, default): + if build_dir: + default = os.path.join(build_dir, default) + return default + class ConfigError(Exception): """Represents an error trying to configure the Linux kernel.""" @@ -97,9 +102,7 @@ class LinuxSourceTreeOperations(object): def linux_bin(self, params, timeout, build_dir): """Runs the Linux UML binary. Must be named 'linux'.""" - linux_bin = './linux' - if build_dir: - linux_bin = os.path.join(build_dir, 'linux') + linux_bin = get_file_path(build_dir, 'linux') outfile = get_outfile_path(build_dir) with open(outfile, 'w') as output: process = subprocess.Popen([linux_bin] + params, @@ -108,22 +111,13 @@ class LinuxSourceTreeOperations(object): process.wait(timeout) def get_kconfig_path(build_dir): - kconfig_path = KCONFIG_PATH - if build_dir: - kconfig_path = os.path.join(build_dir, KCONFIG_PATH) - return kconfig_path + return get_file_path(build_dir, KCONFIG_PATH) def get_kunitconfig_path(build_dir): - kunitconfig_path = KUNITCONFIG_PATH - if build_dir: - kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH) - return kunitconfig_path + return get_file_path(build_dir, KUNITCONFIG_PATH) def get_outfile_path(build_dir): - outfile_path = OUTFILE_PATH - if build_dir: - outfile_path = os.path.join(build_dir, OUTFILE_PATH) - return outfile_path + return get_file_path(build_dir, OUTFILE_PATH) class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" -- cgit v1.3.1 From 271e0c9dce1b02a825b3cc1a7aa1fab7c381d44b Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Fri, 20 Nov 2020 18:12:43 -0800 Subject: ktest.pl: Fix incorrect reboot for grub2bls This issue was first noticed when I was testing different kernels on Oracle Linux 8 which as Fedora 30+ adopts BLS as default. Even though a kernel entry was added successfully and the index of that kernel entry was retrieved correctly, ktest still wouldn't reboot the system into user-specified kernel. The bug was spotted in subroutine reboot_to where the if-statement never checks for REBOOT_TYPE "grub2bls", therefore the desired entry will not be set for the next boot. Add a check for "grub2bls" so that $grub_reboot $grub_number can be run before a reboot if REBOOT_TYPE is "grub2bls" then we can boot to the correct kernel. Link: https://lkml.kernel.org/r/20201121021243.1532477-1-libo.chen@oracle.com Cc: stable@vger.kernel.org Fixes: ac2466456eaa ("ktest: introduce grub2bls REBOOT_TYPE option") Signed-off-by: Libo Chen Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index cb16d2aac51c..54188ee16c48 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2040,7 +2040,7 @@ sub reboot_to { if ($reboot_type eq "grub") { run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'"; - } elsif ($reboot_type eq "grub2") { + } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) { run_ssh "$grub_reboot $grub_number"; } elsif ($reboot_type eq "syslinux") { run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path"; -- cgit v1.3.1 From 8cd6bc0359deebd8500e6de95899a8a78d3ec4ba Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 30 Nov 2020 16:32:55 -0500 Subject: ktest.pl: If size of log is too big to email, email error message If the size of the error log is too big to send via email, and the sending fails, it wont email any result. This can be confusing for the user who is waiting for an email on the completion of the tests. If it fails to send email, then try again without the log file stating that it failed to send an email. Obviously this will not be of use if the sending of email failed for some other reasons, but it will at least give the user some information when it fails for the most common reason. Cc: stable@vger.kernel.org Fixes: c2d84ddb338c8 ("ktest.pl: Add MAIL_COMMAND option to define how to send email") Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 54188ee16c48..54f7d008e840 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -4253,7 +4253,12 @@ sub do_send_mail { $mail_command =~ s/\$SUBJECT/$subject/g; $mail_command =~ s/\$MESSAGE/$message/g; - run_command $mail_command; + my $ret = run_command $mail_command; + if (!$ret && defined($file)) { + # try again without the file + $message .= "\n\n*** FAILED TO SEND LOG ***\n\n"; + do_send_email($subject, $message); + } } sub send_email { -- cgit v1.3.1 From 170f4869e66275f498ae4736106fb54c0fdcd036 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 30 Nov 2020 16:38:41 -0500 Subject: ktest.pl: Fix the logic for truncating the size of the log file for email The logic for truncating the log file for emailing based on the MAIL_MAX_SIZE option is confusing and incorrect. Simplify it and have the tail of the log file truncated to the max size specified in the config. Cc: stable@vger.kernel.org Fixes: 855d8abd2e8ff ("ktest.pl: Change the logic to control the size of the log file emailed") Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 54f7d008e840..4e2450964517 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1499,17 +1499,16 @@ sub dodie { my $log_file; if (defined($opt{"LOG_FILE"})) { - my $whence = 0; # beginning of file - my $pos = $test_log_start; + my $whence = 2; # End of file + my $log_size = tell LOG; + my $size = $log_size - $test_log_start; if (defined($mail_max_size)) { - my $log_size = tell LOG; - $log_size -= $test_log_start; - if ($log_size > $mail_max_size) { - $whence = 2; # end of file - $pos = - $mail_max_size; + if ($size > $mail_max_size) { + $size = $mail_max_size; } } + my $pos = - $size; $log_file = "$tmpdir/log"; open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)"; open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n"; -- cgit v1.3.1 From 854055c0cf30d732b3514ce7956976f60496b1a1 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 26 Nov 2020 18:49:46 +0000 Subject: selftests/bpf: Fix flavored variants of test_ima Flavored variants of test_progs (e.g. test_progs-no_alu32) change their working directory to the corresponding subdirectory (e.g. no_alu32). Since the setup script required by test_ima (ima_setup.sh) is not mentioned in the dependencies, it does not get copied to these subdirectories and causes flavored variants of test_ima to fail. Adding the script to TRUNNER_EXTRA_FILES ensures that the file is also copied to the subdirectories for the flavored variants of test_progs. Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash") Reported-by: Yonghong Song Suggested-by: Yonghong Song Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201126184946.1708213-1-kpsingh@chromium.org --- tools/testing/selftests/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 3d5940cd110d..894192c319fb 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -389,6 +389,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ + ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -- cgit v1.3.1 From e86843580d1bb1ce12544bca3115cf11d51603ff Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Nov 2020 11:29:13 +0900 Subject: tools/bootconfig: Store size and checksum in footer as le32 Store the size and the checksum fields in the footer as le32 instead of u32. This will allow us to apply bootconfig to the cross build initrd without caring the endianness. Link: https://lkml.kernel.org/r/160583935332.547349.5897811300636587426.stgit@devnote2 Reported-by: Steven Rostedt Suggested-by: Linus Torvalds Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 4a445b6304bb..7362bef1a368 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -183,9 +184,11 @@ static int load_xbc_from_initrd(int fd, char **buf) if (read(fd, &size, sizeof(u32)) < 0) return pr_errno("Failed to read size", -errno); + size = le32toh(size); if (read(fd, &csum, sizeof(u32)) < 0) return pr_errno("Failed to read checksum", -errno); + csum = le32toh(csum); /* Wrong size error */ if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) { @@ -407,10 +410,10 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Add a footer */ p = data + size; - *(u32 *)p = size; + *(u32 *)p = htole32(size); p += sizeof(u32); - *(u32 *)p = csum; + *(u32 *)p = htole32(csum); p += sizeof(u32); memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); -- cgit v1.3.1 From 0c7a7e1a8ff3fb6d01467b43c62760e6bf0afab3 Mon Sep 17 00:00:00 2001 From: David Gow Date: Mon, 9 Nov 2020 23:29:36 -0800 Subject: kunit: kunit_tool: Correctly parse diagnostic messages Currently, kunit_tool expects all diagnostic lines in test results to contain ": " somewhere, as both the subtest header and the crash report do. Fix this to accept any line starting with (minus indent) "# " as being a valid diagnostic line. This matches what the TAP spec[1] and the draft KTAP spec[2] are expecting. [1]: http://testanything.org/tap-specification.html [2]: https://lore.kernel.org/linux-kselftest/CY4PR13MB1175B804E31E502221BC8163FD830@CY4PR13MB1175.namprd13.prod.outlook.com/T/ Signed-off-by: David Gow Acked-by: Marco Elver Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index bbfe1b4e4c1c..6614ec4d0898 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -135,8 +135,8 @@ def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: else: return False -SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# .*?: (.*)$') -DIAGNOSTIC_CRASH_MESSAGE = 'kunit test case crashed!' +SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$') +DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$') def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: save_non_diagnositic(lines, test_case) @@ -146,7 +146,8 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: match = SUBTEST_DIAGNOSTIC.match(line) if match: test_case.log.append(lines.pop(0)) - if match.group(1) == DIAGNOSTIC_CRASH_MESSAGE: + crash_match = DIAGNOSTIC_CRASH_MESSAGE.match(line) + if crash_match: test_case.status = TestStatus.TEST_CRASHED return True else: -- cgit v1.3.1 From 008cb2ec4354fa1c4a166eca8e5eec15112847b3 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Sun, 29 Nov 2020 14:54:07 +0200 Subject: selftests: forwarding: Add QinQ veto testing Test that each veto that was added in the previous patch, is indeed vetoed. $ ./q_in_q_veto.sh TEST: create 802.1ad vlan upper on top of a front panel [ OK ] TEST: create 802.1ad vlan upper on top of a bridge port [ OK ] TEST: create 802.1ad vlan upper on top of a lag [ OK ] TEST: create 802.1ad vlan upper on top 802.1q bridge [ OK ] TEST: create 802.1ad vlan upper on top 802.1ad bridge [ OK ] TEST: create 802.1q vlan upper on top 802.1ad bridge [ OK ] TEST: create vlan upper on top of front panel enslaved to 802.1ad bridge [ OK ] TEST: create vlan upper on top of lag enslaved to 802.1ad bridge [ OK ] TEST: enslave front panel with vlan upper to 802.1ad bridge [ OK ] TEST: enslave lag with vlan upper to 802.1ad bridge [ OK ] TEST: IP address addition to 802.1ad bridge [ OK ] TEST: switch bridge protocol [ OK ] Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/q_in_q_veto.sh | 296 +++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh new file mode 100755 index 000000000000..7edaed8eb86a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh @@ -0,0 +1,296 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + create_8021ad_vlan_upper_on_top_front_panel_port + create_8021ad_vlan_upper_on_top_bridge_port + create_8021ad_vlan_upper_on_top_lag + create_8021ad_vlan_upper_on_top_bridge + create_8021ad_vlan_upper_on_top_8021ad_bridge + create_vlan_upper_on_top_8021ad_bridge + create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge + create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge + enslave_front_panel_with_vlan_upper_to_8021ad_bridge + enslave_lag_with_vlan_upper_to_8021ad_bridge + add_ip_address_to_8021ad_bridge + switch_bridge_protocol_from_8021q_to_8021ad +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up + + sleep 10 +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +create_vlan_upper_on_top_of_bridge() +{ + RET=0 + + local bridge_proto=$1; shift + local netdev_proto=$1; shift + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0 + + ip link set dev br0 up + ip link set dev $swp1 master br0 + + ip link add name br0.100 link br0 type vlan \ + protocol $netdev_proto id 100 2>/dev/null + check_fail $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge not rejected" + + ip link add name br0.100 link br0 type vlan \ + protocol $netdev_proto id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge rejected without extack" + + log_test "create $netdev_proto vlan upper on top $bridge_proto bridge" + + ip link del dev br0 +} + +create_8021ad_vlan_upper_on_top_front_panel_port() +{ + RET=0 + + ip link add name $swp1.100 link $swp1 type vlan \ + protocol 802.1ad id 100 2>/dev/null + check_fail $? "802.1ad vlan upper creation on top of a front panel not rejected" + + ip link add name $swp1.100 link $swp1 type vlan \ + protocol 802.1ad id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1ad vlan upper creation on top of a front panel rejected without extack" + + log_test "create 802.1ad vlan upper on top of a front panel" +} + +create_8021ad_vlan_upper_on_top_bridge_port() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev br0 up + + ip link add name $swp1.100 link $swp1 type vlan \ + protocol 802.1ad id 100 2>/dev/null + check_fail $? "802.1ad vlan upper creation on top of a bridge port not rejected" + + ip link add name $swp1.100 link $swp1 type vlan \ + protocol 802.1ad id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1ad vlan upper creation on top of a bridge port rejected without extack" + + log_test "create 802.1ad vlan upper on top of a bridge port" + + ip link del dev br0 +} + +create_8021ad_vlan_upper_on_top_lag() +{ + RET=0 + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 + + ip link add name bond1.100 link bond1 type vlan \ + protocol 802.1ad id 100 2>/dev/null + check_fail $? "802.1ad vlan upper creation on top of a lag not rejected" + + ip link add name bond1.100 link bond1 type vlan \ + protocol 802.1ad id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1ad vlan upper creation on top of a lag rejected without extack" + + log_test "create 802.1ad vlan upper on top of a lag" + + ip link del dev bond1 +} + +create_8021ad_vlan_upper_on_top_bridge() +{ + RET=0 + + create_vlan_upper_on_top_of_bridge "802.1q" "802.1ad" +} + +create_8021ad_vlan_upper_on_top_8021ad_bridge() +{ + RET=0 + + create_vlan_upper_on_top_of_bridge "802.1ad" "802.1ad" +} + +create_vlan_upper_on_top_8021ad_bridge() +{ + RET=0 + + create_vlan_upper_on_top_of_bridge "802.1ad" "802.1q" +} + +create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + + ip link add name $swp1.100 link $swp1 type vlan id 100 2>/dev/null + check_fail $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge not rejected" + + ip link add name $swp1.100 link $swp1 type vlan id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge rejected without extack" + + log_test "create vlan upper on top of front panel enslaved to 802.1ad bridge" + + ip link del dev br0 +} + +create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 up + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 + ip link set dev bond1 master br0 + + ip link add name bond1.100 link bond1 type vlan id 100 2>/dev/null + check_fail $? "vlan upper creation on top of lag enslaved to 802.1ad bridge not rejected" + + ip link add name bond1.100 link bond1 type vlan id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "vlan upper creation on top of lag enslaved to 802.1ad bridge rejected without extack" + + log_test "create vlan upper on top of lag enslaved to 802.1ad bridge" + + ip link del dev bond1 + ip link del dev br0 +} + +enslave_front_panel_with_vlan_upper_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 up + + ip link add name $swp1.100 link $swp1 type vlan id 100 + + ip link set dev $swp1 master br0 2>/dev/null + check_fail $? "front panel with vlan upper enslavemnt to 802.1ad bridge not rejected" + + ip link set dev $swp1 master br0 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? "front panel with vlan upper enslavemnt to 802.1ad bridge rejected without extack" + + log_test "enslave front panel with vlan upper to 802.1ad bridge" + + ip link del dev $swp1.100 + ip link del dev br0 +} + +enslave_lag_with_vlan_upper_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 up + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 + ip link add name bond1.100 link bond1 type vlan id 100 + + ip link set dev bond1 master br0 2>/dev/null + check_fail $? "lag with vlan upper enslavemnt to 802.1ad bridge not rejected" + + ip link set dev bond1 master br0 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "lag with vlan upper enslavemnt to 802.1ad bridge rejected without extack" + + log_test "enslave lag with vlan upper to 802.1ad bridge" + + ip link del dev bond1 + ip link del dev br0 +} + + +add_ip_address_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + + ip link set dev br0 up + ip link set dev $swp1 master br0 + + ip addr add dev br0 192.0.2.17/28 2>/dev/null + check_fail $? "IP address addition to 802.1ad bridge not rejected" + + ip addr add dev br0 192.0.2.17/28 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? "IP address addition to 802.1ad bridge rejected without extack" + + log_test "IP address addition to 802.1ad bridge" + + ip link del dev br0 +} + +switch_bridge_protocol_from_8021q_to_8021ad() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + + ip link set dev br0 up + ip link set dev $swp1 master br0 + + ip link set dev br0 type bridge vlan_protocol 802.1q 2>/dev/null + check_fail $? "switching bridge protocol from 802.1q to 802.1ad not rejected" + + log_test "switch bridge protocol" + + ip link del dev br0 +} + + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.3.1 From f6a8250ea1e42ad1f4f3bab01c851ec5fd48f0e7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 30 Nov 2020 14:33:35 -0800 Subject: libbpf: Fix ring_buffer__poll() to return number of consumed samples Fix ring_buffer__poll() to return the number of non-discarded records consumed, just like its documentation states. It's also consistent with ring_buffer__consume() return. Fix up selftests with wrong expected results. Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support") Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201130223336.904192-1-andrii@kernel.org --- tools/lib/bpf/ringbuf.c | 2 +- tools/testing/selftests/bpf/prog_tests/ringbuf.c | 2 +- tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 5c6522c89af1..98537ff2679e 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -278,7 +278,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) err = ringbuf_process_ring(ring); if (err < 0) return err; - res += cnt; + res += err; } return cnt < 0 ? -errno : res; } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index c1650548433c..1a48c6f7f54e 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -217,7 +217,7 @@ void test_ringbuf(void) if (CHECK(err, "join_bg", "err %d\n", err)) goto cleanup; - if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret)) + if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret)) goto cleanup; /* 3 rounds, 2 samples each */ diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index 78e450609803..d37161e59bb2 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -81,7 +81,7 @@ void test_ringbuf_multi(void) /* poll for samples, should get 2 ringbufs back */ err = ring_buffer__poll(ringbuf, -1); - if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err)) + if (CHECK(err != 2, "poll_res", "expected 2 records, got %d\n", err)) goto cleanup; /* expect extra polling to return nothing */ -- cgit v1.3.1 From 156c9b70dbfb83eeeff39e9202eb5f8bb6d0fd04 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 30 Nov 2020 14:33:36 -0800 Subject: selftests/bpf: Drain ringbuf samples at the end of test Avoid occasional test failures due to the last sample being delayed to another ring_buffer__poll() call. Instead, drain samples completely with ring_buffer__consume(). This is supposed to fix a rare and non-deterministic test failure in libbpf CI. Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201130223336.904192-2-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/ringbuf.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 1a48c6f7f54e..fddbc5db5d6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -220,6 +220,12 @@ void test_ringbuf(void) if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret)) goto cleanup; + /* due to timing variations, there could still be non-notified + * samples, so consume them here to collect all the samples + */ + err = ring_buffer__consume(ringbuf); + CHECK(err < 0, "rb_consume", "failed: %d\b", err); + /* 3 rounds, 2 samples each */ cnt = atomic_xchg(&sample_cnt, 0); CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt); -- cgit v1.3.1 From 1967939462641d8b36bcb3fcf06d48e66cd67a4f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 29 Nov 2020 04:33:35 +0900 Subject: Compiler Attributes: remove CONFIG_ENABLE_MUST_CHECK Revert commit cebc04ba9aeb ("add CONFIG_ENABLE_MUST_CHECK"). A lot of warn_unused_result warnings existed in 2006, but until now they have been fixed thanks to people doing allmodconfig tests. Our goal is to always enable __must_check where appropriate, so this CONFIG option is no longer needed. I see a lot of defconfig (arch/*/configs/*_defconfig) files having: # CONFIG_ENABLE_MUST_CHECK is not set I did not touch them for now since it would be a big churn. If arch maintainers want to clean them up, please go ahead. While I was here, I also moved __must_check to compiler_attributes.h from compiler_types.h Signed-off-by: Masahiro Yamada Acked-by: Jason A. Donenfeld Acked-by: Nathan Chancellor Reviewed-by: Nick Desaulniers [Moved addition in compiler_attributes.h to keep it sorted] Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 6 ++++++ include/linux/compiler_types.h | 6 ------ lib/Kconfig.debug | 8 -------- tools/testing/selftests/wireguard/qemu/debug.config | 1 - 4 files changed, 6 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index b2a3f4f641a7..ea5e04e75845 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -272,6 +272,12 @@ */ #define __used __attribute__((__used__)) +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warn_005funused_005fresult-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result + */ +#define __must_check __attribute__((__warn_unused_result__)) + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index ac3fa37a84f9..7ef20d1a6c28 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -110,12 +110,6 @@ struct ftrace_likely_data { unsigned long constant; }; -#ifdef CONFIG_ENABLE_MUST_CHECK -#define __must_check __attribute__((__warn_unused_result__)) -#else -#define __must_check -#endif - #if defined(CC_USING_HOTPATCH) #define notrace __attribute__((hotpatch(0, 0))) #elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c789b39ed527..cb8ef4fd0d02 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -286,14 +286,6 @@ config GDB_SCRIPTS endif # DEBUG_INFO -config ENABLE_MUST_CHECK - bool "Enable __must_check logic" - default y - help - Enable the __must_check logic in the kernel build. Disable this to - suppress the "warning: ignoring return value of 'foo', declared with - attribute warn_unused_result" messages. - config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index b50c2085c1ac..fe07d97df9fa 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -1,5 +1,4 @@ CONFIG_LOCALVERSION="-debug" -CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_POINTER=y CONFIG_STACK_VALIDATION=y CONFIG_DEBUG_KERNEL=y -- cgit v1.3.1 From 179ef035992e89646e17138b18b130bb874b86bb Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 27 Nov 2020 14:32:36 -0500 Subject: selftests: Add kselftest for syscall user dispatch Implement functionality tests for syscall user dispatch. In order to make the test portable, refrain from open coding syscall dispatchers and calculating glibc memory ranges. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Acked-by: Kees Cook Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201127193238.821364-6-krisman@collabora.com --- tools/testing/selftests/Makefile | 1 + .../selftests/syscall_user_dispatch/.gitignore | 3 + .../selftests/syscall_user_dispatch/Makefile | 9 + .../testing/selftests/syscall_user_dispatch/config | 1 + .../selftests/syscall_user_dispatch/sud_test.c | 310 +++++++++++++++++++++ 5 files changed, 324 insertions(+) create mode 100644 tools/testing/selftests/syscall_user_dispatch/.gitignore create mode 100644 tools/testing/selftests/syscall_user_dispatch/Makefile create mode 100644 tools/testing/selftests/syscall_user_dispatch/config create mode 100644 tools/testing/selftests/syscall_user_dispatch/sud_test.c (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d9c283503159..96d5682fb9d8 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -56,6 +56,7 @@ TARGETS += sparc64 TARGETS += splice TARGETS += static_keys TARGETS += sync +TARGETS += syscall_user_dispatch TARGETS += sysctl TARGETS += tc-testing TARGETS += timens diff --git a/tools/testing/selftests/syscall_user_dispatch/.gitignore b/tools/testing/selftests/syscall_user_dispatch/.gitignore new file mode 100644 index 000000000000..f539615ad5da --- /dev/null +++ b/tools/testing/selftests/syscall_user_dispatch/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +sud_test +sud_benchmark diff --git a/tools/testing/selftests/syscall_user_dispatch/Makefile b/tools/testing/selftests/syscall_user_dispatch/Makefile new file mode 100644 index 000000000000..8e15fa42bcda --- /dev/null +++ b/tools/testing/selftests/syscall_user_dispatch/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +top_srcdir = ../../../.. +INSTALL_HDR_PATH = $(top_srcdir)/usr +LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ + +CFLAGS += -Wall -I$(LINUX_HDR_PATH) + +TEST_GEN_PROGS := sud_test +include ../lib.mk diff --git a/tools/testing/selftests/syscall_user_dispatch/config b/tools/testing/selftests/syscall_user_dispatch/config new file mode 100644 index 000000000000..039e303e59d7 --- /dev/null +++ b/tools/testing/selftests/syscall_user_dispatch/config @@ -0,0 +1 @@ +CONFIG_GENERIC_ENTRY=y diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c new file mode 100644 index 000000000000..6498b050ef89 --- /dev/null +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Collabora Ltd. + * + * Test code for syscall user dispatch + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include +#include "../kselftest_harness.h" + +#ifndef PR_SET_SYSCALL_USER_DISPATCH +# define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 +#endif + +#ifndef SYS_USER_DISPATCH +# define SYS_USER_DISPATCH 2 +#endif + +#ifdef __NR_syscalls +# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */ +#else +# define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ +#endif + +#define SYSCALL_DISPATCH_ON(x) ((x) = 1) +#define SYSCALL_DISPATCH_OFF(x) ((x) = 0) + +/* Test Summary: + * + * - dispatch_trigger_sigsys: Verify if PR_SET_SYSCALL_USER_DISPATCH is + * able to trigger SIGSYS on a syscall. + * + * - bad_selector: Test that a bad selector value triggers SIGSYS with + * si_errno EINVAL. + * + * - bad_prctl_param: Test that the API correctly rejects invalid + * parameters on prctl + * + * - dispatch_and_return: Test that a syscall is selectively dispatched + * to userspace depending on the value of selector. + * + * - disable_dispatch: Test that the PR_SYS_DISPATCH_OFF correctly + * disables the dispatcher + * + * - direct_dispatch_range: Test that a syscall within the allowed range + * can bypass the dispatcher. + */ + +TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) +{ + char sel = 0; + struct sysinfo info; + int ret; + + ret = sysinfo(&info); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); + } + + SYSCALL_DISPATCH_ON(sel); + + sysinfo(&info); + + EXPECT_FALSE(true) { + TH_LOG("Unreachable!"); + } +} + +TEST(bad_prctl_param) +{ + char sel = 0; + int op; + + /* Invalid op */ + op = -1; + prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel); + ASSERT_EQ(EINVAL, errno); + + /* PR_SYS_DISPATCH_OFF */ + op = PR_SYS_DISPATCH_OFF; + + /* offset != 0 */ + prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0); + EXPECT_EQ(EINVAL, errno); + + /* len != 0 */ + prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0); + EXPECT_EQ(EINVAL, errno); + + /* sel != NULL */ + prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel); + EXPECT_EQ(EINVAL, errno); + + /* Valid parameter */ + errno = 0; + prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0); + EXPECT_EQ(0, errno); + + /* PR_SYS_DISPATCH_ON */ + op = PR_SYS_DISPATCH_ON; + + /* Dispatcher region is bad (offset > 0 && len == 0) */ + prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel); + EXPECT_EQ(EINVAL, errno); + prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel); + EXPECT_EQ(EINVAL, errno); + + /* Invalid selector */ + prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1); + ASSERT_EQ(EFAULT, errno); + + /* + * Dispatcher range overflows unsigned long + */ + prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel); + ASSERT_EQ(EINVAL, errno) { + TH_LOG("Should reject bad syscall range"); + } + + /* + * Allowed range overflows usigned long + */ + prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel); + ASSERT_EQ(EINVAL, errno) { + TH_LOG("Should reject bad syscall range"); + } +} + +/* + * Use global selector for handle_sigsys tests, to avoid passing + * selector to signal handler + */ +char glob_sel; +int nr_syscalls_emulated; +int si_code; +int si_errno; + +static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) +{ + si_code = info->si_code; + si_errno = info->si_errno; + + if (info->si_syscall == MAGIC_SYSCALL_1) + nr_syscalls_emulated++; + + /* In preparation for sigreturn. */ + SYSCALL_DISPATCH_OFF(glob_sel); +} + +TEST(dispatch_and_return) +{ + long ret; + struct sigaction act; + sigset_t mask; + + glob_sel = 0; + nr_syscalls_emulated = 0; + si_code = 0; + si_errno = 0; + + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + + act.sa_sigaction = handle_sigsys; + act.sa_flags = SA_SIGINFO; + act.sa_mask = mask; + + ret = sigaction(SIGSYS, &act, NULL); + ASSERT_EQ(0, ret); + + /* Make sure selector is good prior to prctl. */ + SYSCALL_DISPATCH_OFF(glob_sel); + + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); + } + + /* MAGIC_SYSCALL_1 doesn't exist. */ + SYSCALL_DISPATCH_OFF(glob_sel); + ret = syscall(MAGIC_SYSCALL_1); + EXPECT_EQ(-1, ret) { + TH_LOG("Dispatch triggered unexpectedly"); + } + + /* MAGIC_SYSCALL_1 should be emulated. */ + nr_syscalls_emulated = 0; + SYSCALL_DISPATCH_ON(glob_sel); + + ret = syscall(MAGIC_SYSCALL_1); + EXPECT_EQ(MAGIC_SYSCALL_1, ret) { + TH_LOG("Failed to intercept syscall"); + } + EXPECT_EQ(1, nr_syscalls_emulated) { + TH_LOG("Failed to emulate syscall"); + } + ASSERT_EQ(SYS_USER_DISPATCH, si_code) { + TH_LOG("Bad si_code in SIGSYS"); + } + ASSERT_EQ(0, si_errno) { + TH_LOG("Bad si_errno in SIGSYS"); + } +} + +TEST_SIGNAL(bad_selector, SIGSYS) +{ + long ret; + struct sigaction act; + sigset_t mask; + struct sysinfo info; + + glob_sel = 0; + nr_syscalls_emulated = 0; + si_code = 0; + si_errno = 0; + + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + + act.sa_sigaction = handle_sigsys; + act.sa_flags = SA_SIGINFO; + act.sa_mask = mask; + + ret = sigaction(SIGSYS, &act, NULL); + ASSERT_EQ(0, ret); + + /* Make sure selector is good prior to prctl. */ + SYSCALL_DISPATCH_OFF(glob_sel); + + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); + } + + glob_sel = -1; + + sysinfo(&info); + + /* Even though it is ready to catch SIGSYS, the signal is + * supposed to be uncatchable. + */ + + EXPECT_FALSE(true) { + TH_LOG("Unreachable!"); + } +} + +TEST(disable_dispatch) +{ + int ret; + struct sysinfo info; + char sel = 0; + + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); + } + + /* MAGIC_SYSCALL_1 doesn't exist. */ + SYSCALL_DISPATCH_OFF(glob_sel); + + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF, 0, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Failed to unset syscall user dispatch"); + } + + /* Shouldn't have any effect... */ + SYSCALL_DISPATCH_ON(glob_sel); + + ret = syscall(__NR_sysinfo, &info); + EXPECT_EQ(0, ret) { + TH_LOG("Dispatch triggered unexpectedly"); + } +} + +TEST(direct_dispatch_range) +{ + int ret = 0; + struct sysinfo info; + char sel = 0; + + /* + * Instead of calculating libc addresses; allow the entire + * memory map and lock the selector. + */ + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); + } + + SYSCALL_DISPATCH_ON(sel); + + ret = sysinfo(&info); + ASSERT_EQ(0, ret) { + TH_LOG("Dispatch triggered unexpectedly"); + } +} + +TEST_HARNESS_MAIN -- cgit v1.3.1 From d87ae0fa21c26db2d7c66f22dee9c27ecda48ce2 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 27 Nov 2020 14:32:37 -0500 Subject: selftests: Add benchmark for syscall user dispatch This is the patch I'm using to evaluate the impact syscall user dispatch has on native syscall (syscalls not redirected to userspace) when enabled for the process and submiting syscalls though the unblocked dispatch selector. It works by running a step to define a baseline of the cost of executing sysinfo, then enabling SUD, and rerunning that step. On my test machine, an AMD Ryzen 5 1500X, I have the following results with the latest version of syscall user dispatch patches. root@olga:~# syscall_user_dispatch/sud_benchmark Calibrating test set to last ~5 seconds... test iterations = 37500000 Avg syscall time 134ns. Caught sys_ff00 trapped_call_count 1, native_call_count 0. Avg syscall time 147ns. Interception overhead: 9.7% (+13ns). Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201127193238.821364-7-krisman@collabora.com --- .../selftests/syscall_user_dispatch/Makefile | 2 +- .../syscall_user_dispatch/sud_benchmark.c | 200 +++++++++++++++++++++ 2 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c (limited to 'tools') diff --git a/tools/testing/selftests/syscall_user_dispatch/Makefile b/tools/testing/selftests/syscall_user_dispatch/Makefile index 8e15fa42bcda..03c120270953 100644 --- a/tools/testing/selftests/syscall_user_dispatch/Makefile +++ b/tools/testing/selftests/syscall_user_dispatch/Makefile @@ -5,5 +5,5 @@ LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ CFLAGS += -Wall -I$(LINUX_HDR_PATH) -TEST_GEN_PROGS := sud_test +TEST_GEN_PROGS := sud_test sud_benchmark include ../lib.mk diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c new file mode 100644 index 000000000000..6689f1183dbf --- /dev/null +++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Collabora Ltd. + * + * Benchmark and test syscall user dispatch + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef PR_SET_SYSCALL_USER_DISPATCH +# define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 +#endif + +#ifdef __NR_syscalls +# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */ +#else +# define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ +#endif + +/* + * To test returning from a sigsys with selector blocked, the test + * requires some per-architecture support (i.e. knowledge about the + * signal trampoline address). On i386, we know it is on the vdso, and + * a small trampoline is open-coded for x86_64. Other architectures + * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN + * out of the box, but don't enable them until they support syscall user + * dispatch. + */ +#if defined(__x86_64__) || defined(__i386__) +#define TEST_BLOCKED_RETURN +#endif + +#ifdef __x86_64__ +void* (syscall_dispatcher_start)(void); +void* (syscall_dispatcher_end)(void); +#else +unsigned long syscall_dispatcher_start = 0; +unsigned long syscall_dispatcher_end = 0; +#endif + +unsigned long trapped_call_count = 0; +unsigned long native_call_count = 0; + +char selector; +#define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON) +#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF) + +#define CALIBRATION_STEP 100000 +#define CALIBRATE_TO_SECS 5 +int factor; + +static double one_sysinfo_step(void) +{ + struct timespec t1, t2; + int i; + struct sysinfo info; + + clock_gettime(CLOCK_MONOTONIC, &t1); + for (i = 0; i < CALIBRATION_STEP; i++) + sysinfo(&info); + clock_gettime(CLOCK_MONOTONIC, &t2); + return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec); +} + +static void calibrate_set(void) +{ + double elapsed = 0; + + printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS); + + while (elapsed < 1) { + elapsed += one_sysinfo_step(); + factor += CALIBRATE_TO_SECS; + } + + printf("test iterations = %d\n", CALIBRATION_STEP * factor); +} + +static double perf_syscall(void) +{ + unsigned int i; + double partial = 0; + + for (i = 0; i < factor; ++i) + partial += one_sysinfo_step()/(CALIBRATION_STEP*factor); + return partial; +} + +static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) +{ + char buf[1024]; + int len; + + SYSCALL_UNBLOCK; + + /* printf and friends are not signal-safe. */ + len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall); + write(1, buf, len); + + if (info->si_syscall == MAGIC_SYSCALL_1) + trapped_call_count++; + else + native_call_count++; + +#ifdef TEST_BLOCKED_RETURN + SYSCALL_BLOCK; +#endif + +#ifdef __x86_64__ + __asm__ volatile("movq $0xf, %rax"); + __asm__ volatile("leaveq"); + __asm__ volatile("add $0x8, %rsp"); + __asm__ volatile("syscall_dispatcher_start:"); + __asm__ volatile("syscall"); + __asm__ volatile("nop"); /* Landing pad within dispatcher area */ + __asm__ volatile("syscall_dispatcher_end:"); +#endif + +} + +int main(void) +{ + struct sigaction act; + double time1, time2; + int ret; + sigset_t mask; + + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + + act.sa_sigaction = handle_sigsys; + act.sa_flags = SA_SIGINFO; + act.sa_mask = mask; + + calibrate_set(); + + time1 = perf_syscall(); + printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9); + + ret = sigaction(SIGSYS, &act, NULL); + if (ret) { + perror("Error sigaction:"); + exit(-1); + } + + fprintf(stderr, "Enabling syscall trapping.\n"); + + if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, + syscall_dispatcher_start, + (syscall_dispatcher_end - syscall_dispatcher_start + 1), + &selector)) { + perror("prctl failed\n"); + exit(-1); + } + + SYSCALL_BLOCK; + syscall(MAGIC_SYSCALL_1); + +#ifdef TEST_BLOCKED_RETURN + if (selector == PR_SYS_DISPATCH_OFF) { + fprintf(stderr, "Failed to return with selector blocked.\n"); + exit(-1); + } +#endif + + SYSCALL_UNBLOCK; + + if (!trapped_call_count) { + fprintf(stderr, "syscall trapping does not work.\n"); + exit(-1); + } + + time2 = perf_syscall(); + + if (native_call_count) { + perror("syscall trapping intercepted more syscalls than expected\n"); + exit(-1); + } + + printf("trapped_call_count %lu, native_call_count %lu.\n", + trapped_call_count, native_call_count); + printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9); + printf("Interception overhead: %.1lf%% (+%.0lfns).\n", + 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1)); + return 0; + +} -- cgit v1.3.1 From 2c07343abd8932200a45ff7b10950e71081e9e77 Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Wed, 2 Dec 2020 17:26:43 +0100 Subject: selftests/seccomp: Update kernel config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit seccomp_bpf.c uses unshare(CLONE_NEWPID), which requires CONFIG_PID_NS to be set. Cc: Kees Cook Cc: Shuah Khan Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Signed-off-by: Mickaël Salaün Acked-by: Tycho Andersen Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201202162643.249276-1-mic@digikod.net --- tools/testing/selftests/seccomp/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config index 64c19d8eba79..ad431a5178fb 100644 --- a/tools/testing/selftests/seccomp/config +++ b/tools/testing/selftests/seccomp/config @@ -1,3 +1,4 @@ +CONFIG_PID_NS=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y CONFIG_USER_NS=y -- cgit v1.3.1 From a999696c547f1a8ef2ddbb9b0e77abc3f6db4ff1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 2 Dec 2020 09:25:14 -0800 Subject: selftests/bpf: Rewrite test_sock_addr bind bpf into C I'm planning to extend it in the next patches. It's much easier to work with C than BPF assembly. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201202172516.3483656-2-sdf@google.com --- tools/testing/selftests/bpf/progs/bind4_prog.c | 71 +++++++++ tools/testing/selftests/bpf/progs/bind6_prog.c | 88 +++++++++++ tools/testing/selftests/bpf/test_sock_addr.c | 196 ++----------------------- 3 files changed, 171 insertions(+), 184 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/bind4_prog.c create mode 100644 tools/testing/selftests/bpf/progs/bind6_prog.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c new file mode 100644 index 000000000000..0951302a984a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bind4_prog.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SERV4_IP 0xc0a801feU /* 192.168.1.254 */ +#define SERV4_PORT 4040 +#define SERV4_REWRITE_IP 0x7f000001U /* 127.0.0.1 */ +#define SERV4_REWRITE_PORT 4444 + +SEC("cgroup/bind4") +int bind_v4_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock *sk; + __u32 user_ip4; + __u16 user_port; + + sk = ctx->sk; + if (!sk) + return 0; + + if (sk->family != AF_INET) + return 0; + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + + if (ctx->user_ip4 != bpf_htonl(SERV4_IP) || + ctx->user_port != bpf_htons(SERV4_PORT)) + return 0; + + // u8 narrow loads: + user_ip4 = 0; + user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0; + user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8; + user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16; + user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24; + if (ctx->user_ip4 != user_ip4) + return 0; + + user_port = 0; + user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0; + user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8; + if (ctx->user_port != user_port) + return 0; + + // u16 narrow loads: + user_ip4 = 0; + user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0; + user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16; + if (ctx->user_ip4 != user_ip4) + return 0; + + ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP); + ctx->user_port = bpf_htons(SERV4_REWRITE_PORT); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c new file mode 100644 index 000000000000..16da1cf85418 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bind6_prog.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */ +#define SERV6_IP_1 0x12345678 +#define SERV6_IP_2 0x00000000 +#define SERV6_IP_3 0x0000abcd +#define SERV6_PORT 6060 +#define SERV6_REWRITE_IP_0 0x00000000 +#define SERV6_REWRITE_IP_1 0x00000000 +#define SERV6_REWRITE_IP_2 0x00000000 +#define SERV6_REWRITE_IP_3 0x00000001 +#define SERV6_REWRITE_PORT 6666 + +SEC("cgroup/bind6") +int bind_v6_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock *sk; + __u32 user_ip6; + __u16 user_port; + int i; + + sk = ctx->sk; + if (!sk) + return 0; + + if (sk->family != AF_INET6) + return 0; + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + + if (ctx->user_ip6[0] != bpf_htonl(SERV6_IP_0) || + ctx->user_ip6[1] != bpf_htonl(SERV6_IP_1) || + ctx->user_ip6[2] != bpf_htonl(SERV6_IP_2) || + ctx->user_ip6[3] != bpf_htonl(SERV6_IP_3) || + ctx->user_port != bpf_htons(SERV6_PORT)) + return 0; + + // u8 narrow loads: + for (i = 0; i < 4; i++) { + user_ip6 = 0; + user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0; + user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8; + user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16; + user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24; + if (ctx->user_ip6[i] != user_ip6) + return 0; + } + + user_port = 0; + user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0; + user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8; + if (ctx->user_port != user_port) + return 0; + + // u16 narrow loads: + for (i = 0; i < 4; i++) { + user_ip6 = 0; + user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0; + user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16; + if (ctx->user_ip6[i] != user_ip6) + return 0; + } + + ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0); + ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1); + ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2); + ctx->user_ip6[3] = bpf_htonl(SERV6_REWRITE_IP_3); + ctx->user_port = bpf_htons(SERV6_REWRITE_PORT); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index b8c72c1d9cf7..dcb83ab02919 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -31,6 +31,8 @@ #define CONNECT6_PROG_PATH "./connect6_prog.o" #define SENDMSG4_PROG_PATH "./sendmsg4_prog.o" #define SENDMSG6_PROG_PATH "./sendmsg6_prog.o" +#define BIND4_PROG_PATH "./bind4_prog.o" +#define BIND6_PROG_PATH "./bind6_prog.o" #define SERV4_IP "192.168.1.254" #define SERV4_REWRITE_IP "127.0.0.1" @@ -660,190 +662,6 @@ static int load_insns(const struct sock_addr_test *test, return ret; } -/* [1] These testing programs try to read different context fields, including - * narrow loads of different sizes from user_ip4 and user_ip6, and write to - * those allowed to be overridden. - * - * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to - * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used - * in such cases since it accepts only _signed_ 32bit integer as IMM - * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters - * to count jumps properly. - */ - -static int bind4_prog_load(const struct sock_addr_test *test) -{ - union { - uint8_t u4_addr8[4]; - uint16_t u4_addr16[2]; - uint32_t u4_addr32; - } ip4, port; - struct sockaddr_in addr4_rw; - - if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) { - log_err("Invalid IPv4: %s", SERV4_IP); - return -1; - } - - port.u4_addr32 = htons(SERV4_PORT); - - if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT, - (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1) - return -1; - - /* See [1]. */ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (sk.family == AF_INET && */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32), - - /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, type)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1), - BPF_JMP_A(1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28), - - /* 1st_byte_of_user_ip4 == expected && */ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26), - - /* 2nd_byte_of_user_ip4 == expected && */ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip4) + 1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24), - - /* 3rd_byte_of_user_ip4 == expected && */ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip4) + 2), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22), - - /* 4th_byte_of_user_ip4 == expected && */ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip4) + 3), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20), - - /* 1st_half_of_user_ip4 == expected && */ - BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18), - - /* 2nd_half_of_user_ip4 == expected && */ - BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip4) + 2), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16), - - /* whole_user_ip4 == expected && */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip4)), - BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */ - BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12), - - /* 1st_byte_of_user_port == expected && */ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_port)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10), - - /* 1st_half_of_user_port == expected && */ - BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_port)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8), - - /* user_port == expected) { */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_port)), - BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */ - BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4), - - /* user_ip4 = addr4_rw.sin_addr */ - BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_ip4)), - - /* user_port = addr4_rw.sin_port */ - BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_port)), - /* } */ - - /* return 1 */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); -} - -static int bind6_prog_load(const struct sock_addr_test *test) -{ - struct sockaddr_in6 addr6_rw; - struct in6_addr ip6; - - if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) { - log_err("Invalid IPv6: %s", SERV6_IP); - return -1; - } - - if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT, - (struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1) - return -1; - - /* See [1]. */ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (sk.family == AF_INET6 && */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18), - - /* 5th_byte_of_user_ip6 == expected && */ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip6[1])), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16), - - /* 3rd_half_of_user_ip6 == expected && */ - BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip6[1])), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14), - - /* last_word_of_user_ip6 == expected) { */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, user_ip6[3])), - BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]), /* See [2]. */ - BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10), - - -#define STORE_IPV6_WORD(N) \ - BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]), \ - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \ - offsetof(struct bpf_sock_addr, user_ip6[N])) - - /* user_ip6 = addr6_rw.sin6_addr */ - STORE_IPV6_WORD(0), - STORE_IPV6_WORD(1), - STORE_IPV6_WORD(2), - STORE_IPV6_WORD(3), - - /* user_port = addr6_rw.sin6_port */ - BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_port)), - - /* } */ - - /* return 1 */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); -} - static int load_path(const struct sock_addr_test *test, const char *path) { struct bpf_prog_load_attr attr; @@ -865,6 +683,16 @@ static int load_path(const struct sock_addr_test *test, const char *path) return prog_fd; } +static int bind4_prog_load(const struct sock_addr_test *test) +{ + return load_path(test, BIND4_PROG_PATH); +} + +static int bind6_prog_load(const struct sock_addr_test *test) +{ + return load_path(test, BIND6_PROG_PATH); +} + static int connect4_prog_load(const struct sock_addr_test *test) { return load_path(test, CONNECT4_PROG_PATH); -- cgit v1.3.1 From a540c81a2bcb95227c3e24a4478956824858a6b0 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 2 Dec 2020 09:25:16 -0800 Subject: selftests/bpf: Extend bind{4,6} programs with a call to bpf_setsockopt To make sure it doesn't trigger sock_owned_by_me splat. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201202172516.3483656-4-sdf@google.com --- tools/testing/selftests/bpf/progs/bind4_prog.c | 31 ++++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/bind6_prog.c | 31 ++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c index 0951302a984a..c6520f21f5f5 100644 --- a/tools/testing/selftests/bpf/progs/bind4_prog.c +++ b/tools/testing/selftests/bpf/progs/bind4_prog.c @@ -19,6 +19,33 @@ #define SERV4_REWRITE_IP 0x7f000001U /* 127.0.0.1 */ #define SERV4_REWRITE_PORT 4444 +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif + +static __inline int bind_to_device(struct bpf_sock_addr *ctx) +{ + char veth1[IFNAMSIZ] = "test_sock_addr1"; + char veth2[IFNAMSIZ] = "test_sock_addr2"; + char missing[IFNAMSIZ] = "nonexistent_dev"; + char del_bind[IFNAMSIZ] = ""; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &veth1, sizeof(veth1))) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &veth2, sizeof(veth2))) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &missing, sizeof(missing)) != -ENODEV) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &del_bind, sizeof(del_bind))) + return 1; + + return 0; +} + SEC("cgroup/bind4") int bind_v4_prog(struct bpf_sock_addr *ctx) { @@ -62,6 +89,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx) if (ctx->user_ip4 != user_ip4) return 0; + /* Bind to device and unbind it. */ + if (bind_to_device(ctx)) + return 0; + ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP); ctx->user_port = bpf_htons(SERV4_REWRITE_PORT); diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c index 16da1cf85418..4358e44dcf47 100644 --- a/tools/testing/selftests/bpf/progs/bind6_prog.c +++ b/tools/testing/selftests/bpf/progs/bind6_prog.c @@ -25,6 +25,33 @@ #define SERV6_REWRITE_IP_3 0x00000001 #define SERV6_REWRITE_PORT 6666 +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif + +static __inline int bind_to_device(struct bpf_sock_addr *ctx) +{ + char veth1[IFNAMSIZ] = "test_sock_addr1"; + char veth2[IFNAMSIZ] = "test_sock_addr2"; + char missing[IFNAMSIZ] = "nonexistent_dev"; + char del_bind[IFNAMSIZ] = ""; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &veth1, sizeof(veth1))) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &veth2, sizeof(veth2))) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &missing, sizeof(missing)) != -ENODEV) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &del_bind, sizeof(del_bind))) + return 1; + + return 0; +} + SEC("cgroup/bind6") int bind_v6_prog(struct bpf_sock_addr *ctx) { @@ -76,6 +103,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx) return 0; } + /* Bind to device and unbind it. */ + if (bind_to_device(ctx)) + return 0; + ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0); ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1); ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2); -- cgit v1.3.1 From 80ee81e0403c48f4eb342f7c8d40477c89b8836a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:58 -0800 Subject: bpf: Eliminate rlimit-based memory accounting infra for bpf maps Remove rlimit-based accounting infrastructure code, which is not used anymore. To provide a backward compatibility, use an approximation of the bpf map memory footprint as a "memlock" value, available to a user via map info. The approximation is based on the maximal number of elements and key and value sizes. Signed-off-by: Roman Gushchin Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201201215900.3569844-33-guro@fb.com --- include/linux/bpf.h | 12 --- kernel/bpf/syscall.c | 96 ++++------------------ .../testing/selftests/bpf/progs/bpf_iter_bpf_map.c | 2 +- tools/testing/selftests/bpf/progs/map_ptr_kern.c | 7 -- 4 files changed, 17 insertions(+), 100 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1f2c95c15ec..61331a148cde 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -138,11 +138,6 @@ struct bpf_map_ops { const struct bpf_iter_seq_info *iter_seq_info; }; -struct bpf_map_memory { - u32 pages; - struct user_struct *user; -}; - struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -163,7 +158,6 @@ struct bpf_map { u32 btf_key_type_id; u32 btf_value_type_id; struct btf *btf; - struct bpf_map_memory memory; #ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif @@ -1224,12 +1218,6 @@ void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); -int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); -void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size); -void bpf_map_charge_finish(struct bpf_map_memory *mem); -void bpf_map_charge_move(struct bpf_map_memory *dst, - struct bpf_map_memory *src); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index dff3a5f62d7a..29096d96d989 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -128,7 +128,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; } -static u32 bpf_map_value_size(struct bpf_map *map) +static u32 bpf_map_value_size(const struct bpf_map *map) { if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || @@ -346,77 +346,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->numa_node = bpf_map_attr_numa_node(attr); } -static int bpf_charge_memlock(struct user_struct *user, u32 pages) -{ - unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) { - atomic_long_sub(pages, &user->locked_vm); - return -EPERM; - } - return 0; -} - -static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) -{ - if (user) - atomic_long_sub(pages, &user->locked_vm); -} - -int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size) -{ - u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; - struct user_struct *user; - int ret; - - if (size >= U32_MAX - PAGE_SIZE) - return -E2BIG; - - user = get_current_user(); - ret = bpf_charge_memlock(user, pages); - if (ret) { - free_uid(user); - return ret; - } - - mem->pages = pages; - mem->user = user; - - return 0; -} - -void bpf_map_charge_finish(struct bpf_map_memory *mem) -{ - bpf_uncharge_memlock(mem->user, mem->pages); - free_uid(mem->user); -} - -void bpf_map_charge_move(struct bpf_map_memory *dst, - struct bpf_map_memory *src) -{ - *dst = *src; - - /* Make sure src will not be used for the redundant uncharging. */ - memset(src, 0, sizeof(struct bpf_map_memory)); -} - -int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) -{ - int ret; - - ret = bpf_charge_memlock(map->memory.user, pages); - if (ret) - return ret; - map->memory.pages += pages; - return ret; -} - -void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) -{ - bpf_uncharge_memlock(map->memory.user, pages); - map->memory.pages -= pages; -} - static int bpf_map_alloc_id(struct bpf_map *map) { int id; @@ -524,14 +453,11 @@ static void bpf_map_release_memcg(struct bpf_map *map) static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); - struct bpf_map_memory mem; - bpf_map_charge_move(&mem, &map->memory); security_bpf_map_free(map); bpf_map_release_memcg(map); /* implementation dependent freeing */ map->ops->map_free(map); - bpf_map_charge_finish(&mem); } static void bpf_map_put_uref(struct bpf_map *map) @@ -592,6 +518,19 @@ static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) } #ifdef CONFIG_PROC_FS +/* Provides an approximation of the map's memory footprint. + * Used only to provide a backward compatibility and display + * a reasonable "memlock" info. + */ +static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) +{ + unsigned long size; + + size = round_up(map->key_size + bpf_map_value_size(map), 8); + + return round_up(map->max_entries * size, PAGE_SIZE); +} + static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_map *map = filp->private_data; @@ -610,7 +549,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" - "memlock:\t%llu\n" + "memlock:\t%lu\n" "map_id:\t%u\n" "frozen:\t%u\n", map->map_type, @@ -618,7 +557,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->value_size, map->max_entries, map->map_flags, - map->memory.pages * 1ULL << PAGE_SHIFT, + bpf_map_memory_footprint(map), map->id, READ_ONCE(map->frozen)); if (type) { @@ -861,7 +800,6 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, static int map_create(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); - struct bpf_map_memory mem; struct bpf_map *map; int f_flags; int err; @@ -960,9 +898,7 @@ free_map_sec: security_bpf_map_free(map); free_map: btf_put(map->btf); - bpf_map_charge_move(&mem, &map->memory); map->ops->map_free(map); - bpf_map_charge_finish(&mem); return err; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index 08651b23edba..b83b5d2e17dc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -23,6 +23,6 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx) BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter, map->usercnt.counter, - map->memory.user->locked_vm.counter); + 0LLU); return 0; } diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index c325405751e2..d8850bc6a9f1 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -26,17 +26,12 @@ __u32 g_line = 0; return 0; \ }) -struct bpf_map_memory { - __u32 pages; -} __attribute__((preserve_access_index)); - struct bpf_map { enum bpf_map_type map_type; __u32 key_size; __u32 value_size; __u32 max_entries; __u32 id; - struct bpf_map_memory memory; } __attribute__((preserve_access_index)); static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size, @@ -47,7 +42,6 @@ static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size, VERIFY(map->value_size == value_size); VERIFY(map->max_entries == max_entries); VERIFY(map->id > 0); - VERIFY(map->memory.pages > 0); return 1; } @@ -60,7 +54,6 @@ static inline int check_bpf_map_ptr(struct bpf_map *indirect, VERIFY(indirect->value_size == direct->value_size); VERIFY(indirect->max_entries == direct->max_entries); VERIFY(indirect->id == direct->id); - VERIFY(indirect->memory.pages == direct->memory.pages); return 1; } -- cgit v1.3.1 From 4e62d55d77bbdb33d821f5e16306caab38d42267 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 28 Oct 2020 10:50:44 +1100 Subject: selftests: openat2: add RESOLVE_ conflict test Now that we reject conflicting RESOLVE_ flags, add a selftest to avoid regressions. Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/r/20201027235044.5240-3-cyphar@cyphar.com Signed-off-by: Christian Brauner --- tools/testing/selftests/openat2/openat2_test.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index b386367c606b..381d874cce99 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -155,7 +155,7 @@ struct flag_test { int err; }; -#define NUM_OPENAT2_FLAG_TESTS 23 +#define NUM_OPENAT2_FLAG_TESTS 24 void test_openat2_flags(void) { @@ -210,6 +210,12 @@ void test_openat2_flags(void) .how.flags = O_TMPFILE | O_RDWR, .how.mode = 0x0000A00000000000ULL, .err = -EINVAL }, + /* ->resolve flags must not conflict. */ + { .name = "incompatible resolve flags (BENEATH | IN_ROOT)", + .how.flags = O_RDONLY, + .how.resolve = RESOLVE_BENEATH | RESOLVE_IN_ROOT, + .err = -EINVAL }, + /* ->resolve must only contain RESOLVE_* flags. */ { .name = "invalid how.resolve and O_RDONLY", .how.flags = O_RDONLY, -- cgit v1.3.1 From f0812f6ca8299e864fe0f41bd7ffdaae3ce7630e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 2 Dec 2020 01:44:27 +1100 Subject: selftests/powerpc: update .gitignore I did an in-place build of the self-tests and found that it left the tree dirty. Add missed test binaries to .gitignore Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201201144427.1228745-1-dja@axtens.net --- tools/testing/selftests/powerpc/nx-gzip/.gitignore | 3 +++ tools/testing/selftests/powerpc/security/.gitignore | 1 + tools/testing/selftests/powerpc/signal/.gitignore | 1 + tools/testing/selftests/powerpc/syscalls/.gitignore | 1 + 4 files changed, 6 insertions(+) create mode 100644 tools/testing/selftests/powerpc/nx-gzip/.gitignore (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/nx-gzip/.gitignore b/tools/testing/selftests/powerpc/nx-gzip/.gitignore new file mode 100644 index 000000000000..886d522d52df --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +gunz_test +gzfht_test diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index 4257a1f156bb..93614b125ded 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only rfi_flush entry_flush +spectre_v2 diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index 405b5364044c..ce3375cd8e73 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -3,3 +3,4 @@ signal signal_tm sigfuz sigreturn_vdso +sig_sc_double_restart diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore index b00cab225476..a1e19ccdef84 100644 --- a/tools/testing/selftests/powerpc/syscalls/.gitignore +++ b/tools/testing/selftests/powerpc/syscalls/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only ipc_unmuxed +rtas_filter -- cgit v1.3.1 From c9344769e2b46ba28b947bec7a8a8f0a091ecd57 Mon Sep 17 00:00:00 2001 From: Harish Date: Tue, 1 Dec 2020 14:54:03 +0530 Subject: selftests/powerpc: Fix uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch fixes uninitialized variable warning in bad_accesses test which causes the selftests build to fail in older distibutions bad_accesses.c: In function ‘bad_access’: bad_accesses.c:52:9: error: ‘x’ may be used uninitialized in this function [-Werror=maybe-uninitialized] printf("Bad - no SEGV! (%c)\n", x); ^ cc1: all warnings being treated as errors Signed-off-by: Harish Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201201092403.238182-1-harish@linux.ibm.com --- tools/testing/selftests/powerpc/mm/bad_accesses.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index fd747b2ffcfc..65d2148b05dc 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -38,7 +38,7 @@ static void segv_handler(int n, siginfo_t *info, void *ctxt_v) int bad_access(char *p, bool write) { - char x; + char x = 0; fault_code = 0; fault_addr = 0; -- cgit v1.3.1 From 3ba150fb21207e4a7f4b600eb2dbbe83f94571fe Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Mon, 30 Nov 2020 14:00:57 +0530 Subject: lkdtm/powerpc: Add SLB multihit test To check machine check handling, add support to inject slb multihit errors. Co-developed-by: Mahesh Salgaonkar Signed-off-by: Mahesh Salgaonkar Signed-off-by: Ganesh Goudar [mpe: Use CONFIG_PPC_BOOK3S_64 to fix compile errors reported by lkp@intel.com] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201130083057.135610-1-ganeshgr@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 28 +++++- arch/powerpc/mm/book3s64/hash_utils.c | 1 + arch/powerpc/mm/book3s64/slb.c | 27 ------ drivers/misc/lkdtm/Makefile | 1 + drivers/misc/lkdtm/core.c | 3 + drivers/misc/lkdtm/lkdtm.h | 3 + drivers/misc/lkdtm/powerpc.c | 120 ++++++++++++++++++++++++++ tools/testing/selftests/lkdtm/tests.txt | 1 + 8 files changed, 156 insertions(+), 28 deletions(-) create mode 100644 drivers/misc/lkdtm/powerpc.c (limited to 'tools') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 9192cb05a6ab..066b1d34c7bc 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -843,6 +843,32 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) unsigned htab_shift_for_mem_size(unsigned long mem_size); -#endif /* __ASSEMBLY__ */ +enum slb_index { + LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ + KSTACK_INDEX = 1, /* Kernel stack map */ +}; +#define slb_esid_mask(ssize) \ + (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T) + +static inline unsigned long mk_esid_data(unsigned long ea, int ssize, + enum slb_index index) +{ + return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; +} + +static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, + unsigned long flags) +{ + return (vsid << slb_vsid_shift(ssize)) | flags | + ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT); +} + +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, + unsigned long flags) +{ + return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); +} + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e0fe1a43e7b8..73b06adb6eeb 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -112,6 +112,7 @@ int mmu_linear_psize = MMU_PAGE_4K; EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_vmalloc_psize); #ifdef CONFIG_SPARSEMEM_VMEMMAP int mmu_vmemmap_psize = MMU_PAGE_4K; #endif diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 6d720c1c08a4..584567970c11 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -28,35 +28,8 @@ #include "internal.h" -enum slb_index { - LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ - KSTACK_INDEX = 1, /* Kernel stack map */ -}; - static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); -#define slb_esid_mask(ssize) \ - (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) - -static inline unsigned long mk_esid_data(unsigned long ea, int ssize, - enum slb_index index) -{ - return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; -} - -static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, - unsigned long flags) -{ - return (vsid << slb_vsid_shift(ssize)) | flags | - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); -} - -static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, - unsigned long flags) -{ - return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); -} - bool stress_slb_enabled __initdata; static int __init parse_stress_slb(char *p) diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index c70b3822013f..5a92c74eca92 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -10,6 +10,7 @@ lkdtm-$(CONFIG_LKDTM) += rodata_objcopy.o lkdtm-$(CONFIG_LKDTM) += usercopy.o lkdtm-$(CONFIG_LKDTM) += stackleak.o lkdtm-$(CONFIG_LKDTM) += cfi.o +lkdtm-$(CONFIG_PPC_BOOK3S_64) += powerpc.o KASAN_SANITIZE_stackleak.o := n KCOV_INSTRUMENT_rodata.o := n diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 97803f213d9d..1f612c76a61b 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -176,6 +176,9 @@ static const struct crashtype crashtypes[] = { #ifdef CONFIG_X86_32 CRASHTYPE(DOUBLE_FAULT), #endif +#ifdef CONFIG_PPC_BOOK3S_64 + CRASHTYPE(PPC_SLB_MULTIHIT), +#endif }; diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index 6dec4c9b442f..79ec05c18dd1 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -102,4 +102,7 @@ void lkdtm_STACKLEAK_ERASING(void); /* cfi.c */ void lkdtm_CFI_FORWARD_PROTO(void); +/* powerpc.c */ +void lkdtm_PPC_SLB_MULTIHIT(void); + #endif diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c new file mode 100644 index 000000000000..077c9f9ed8d0 --- /dev/null +++ b/drivers/misc/lkdtm/powerpc.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "lkdtm.h" +#include +#include +#include + +/* Inserts new slb entries */ +static void insert_slb_entry(unsigned long p, int ssize, int page_size) +{ + unsigned long flags; + + flags = SLB_VSID_KERNEL | mmu_psize_defs[page_size].sllp; + preempt_disable(); + + asm volatile("slbmte %0,%1" : + : "r" (mk_vsid_data(p, ssize, flags)), + "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED)) + : "memory"); + + asm volatile("slbmte %0,%1" : + : "r" (mk_vsid_data(p, ssize, flags)), + "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED + 1)) + : "memory"); + preempt_enable(); +} + +/* Inject slb multihit on vmalloc-ed address i.e 0xD00... */ +static int inject_vmalloc_slb_multihit(void) +{ + char *p; + + p = vmalloc(PAGE_SIZE); + if (!p) + return -ENOMEM; + + insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_vmalloc_psize); + /* + * This triggers exception, If handled correctly we must recover + * from this error. + */ + p[0] = '!'; + vfree(p); + return 0; +} + +/* Inject slb multihit on kmalloc-ed address i.e 0xC00... */ +static int inject_kmalloc_slb_multihit(void) +{ + char *p; + + p = kmalloc(2048, GFP_KERNEL); + if (!p) + return -ENOMEM; + + insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_linear_psize); + /* + * This triggers exception, If handled correctly we must recover + * from this error. + */ + p[0] = '!'; + kfree(p); + return 0; +} + +/* + * Few initial SLB entries are bolted. Add a test to inject + * multihit in bolted entry 0. + */ +static void insert_dup_slb_entry_0(void) +{ + unsigned long test_address = PAGE_OFFSET, *test_ptr; + unsigned long esid, vsid; + unsigned long i = 0; + + test_ptr = (unsigned long *)test_address; + preempt_disable(); + + asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (i)); + asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (i)); + + /* for i !=0 we would need to mask out the old entry number */ + asm volatile("slbmte %0,%1" : + : "r" (vsid), + "r" (esid | SLB_NUM_BOLTED) + : "memory"); + + asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (i)); + asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (i)); + + /* for i !=0 we would need to mask out the old entry number */ + asm volatile("slbmte %0,%1" : + : "r" (vsid), + "r" (esid | (SLB_NUM_BOLTED + 1)) + : "memory"); + + pr_info("%s accessing test address 0x%lx: 0x%lx\n", + __func__, test_address, *test_ptr); + + preempt_enable(); +} + +void lkdtm_PPC_SLB_MULTIHIT(void) +{ + if (!radix_enabled()) { + pr_info("Injecting SLB multihit errors\n"); + /* + * These need not be separate tests, And they do pretty + * much same thing. In any case we must recover from the + * errors introduced by these functions, machine would not + * survive these tests in case of failure to handle. + */ + inject_vmalloc_slb_multihit(); + inject_kmalloc_slb_multihit(); + insert_dup_slb_entry_0(); + pr_info("Recovered from SLB multihit errors\n"); + } else { + pr_err("XFAIL: This test is for ppc64 and with hash mode MMU only\n"); + } +} diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 74a8d329a72c..18e4599863c0 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -68,3 +68,4 @@ USERCOPY_STACK_BEYOND USERCOPY_KERNEL STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO +PPC_SLB_MULTIHIT Recovered -- cgit v1.3.1 From d4bff72c8401e6f56194ecf455db70ebc22929e2 Mon Sep 17 00:00:00 2001 From: Thomas Karlsson Date: Wed, 2 Dec 2020 19:49:58 +0100 Subject: macvlan: Support for high multicast packet rate Background: Broadcast and multicast packages are enqueued for later processing. This queue was previously hardcoded to 1000. This proved insufficient for handling very high packet rates. This resulted in packet drops for multicast. While at the same time unicast worked fine. The change: This patch make the queue length adjustable to accommodate for environments with very high multicast packet rate. But still keeps the default value of 1000 unless specified. The queue length is specified as a request per macvlan using the IFLA_MACVLAN_BC_QUEUE_LEN parameter. The actual used queue length will then be the maximum of any macvlan connected to the same port. The actual used queue length for the port can be retrieved (read only) by the IFLA_MACVLAN_BC_QUEUE_LEN_USED parameter for verification. This will be followed up by a patch to iproute2 in order to adjust the parameter from userspace. Signed-off-by: Thomas Karlsson Link: https://lore.kernel.org/r/dd4673b2-7eab-edda-6815-85c67ce87f63@paneda.se Signed-off-by: Jakub Kicinski --- drivers/net/macvlan.c | 40 ++++++++++++++++++++++++++++++++++++-- include/linux/if_macvlan.h | 1 + include/uapi/linux/if_link.h | 2 ++ tools/include/uapi/linux/if_link.h | 2 ++ 4 files changed, 43 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d9b6c44a5911..fb51329f8964 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -35,7 +35,7 @@ #define MACVLAN_HASH_BITS 8 #define MACVLAN_HASH_SIZE (1<cb[0])) static void macvlan_port_destroy(struct net_device *dev); +static void update_port_bc_queue_len(struct macvlan_port *port); static inline bool macvlan_passthru(const struct macvlan_port *port) { @@ -354,7 +356,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, MACVLAN_SKB_CB(nskb)->src = src; spin_lock(&port->bc_queue.lock); - if (skb_queue_len(&port->bc_queue) < MACVLAN_BC_QUEUE_LEN) { + if (skb_queue_len(&port->bc_queue) < port->bc_queue_len_used) { if (src) dev_hold(src->dev); __skb_queue_tail(&port->bc_queue, nskb); @@ -1218,6 +1220,7 @@ static int macvlan_port_create(struct net_device *dev) for (i = 0; i < MACVLAN_HASH_SIZE; i++) INIT_HLIST_HEAD(&port->vlan_source_hash[i]); + port->bc_queue_len_used = 0; skb_queue_head_init(&port->bc_queue); INIT_WORK(&port->bc_work, macvlan_process_broadcast); @@ -1486,6 +1489,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, goto destroy_macvlan_port; } + vlan->bc_queue_len_req = MACVLAN_DEFAULT_BC_QUEUE_LEN; + if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) + vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]); + err = register_netdevice(dev); if (err < 0) goto destroy_macvlan_port; @@ -1496,6 +1503,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, goto unregister_netdev; list_add_tail_rcu(&vlan->list, &port->vlans); + update_port_bc_queue_len(vlan->port); netif_stacked_transfer_operstate(lowerdev, dev); linkwatch_fire_event(dev); @@ -1529,6 +1537,7 @@ void macvlan_dellink(struct net_device *dev, struct list_head *head) if (vlan->mode == MACVLAN_MODE_SOURCE) macvlan_flush_sources(vlan->port, vlan); list_del_rcu(&vlan->list); + update_port_bc_queue_len(vlan->port); unregister_netdevice_queue(dev, head); netdev_upper_dev_unlink(vlan->lowerdev, dev); } @@ -1572,6 +1581,12 @@ static int macvlan_changelink(struct net_device *dev, } vlan->flags = flags; } + + if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) { + vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]); + update_port_bc_queue_len(vlan->port); + } + if (set_mode) vlan->mode = mode; if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { @@ -1602,6 +1617,8 @@ static size_t macvlan_get_size(const struct net_device *dev) + nla_total_size(2) /* IFLA_MACVLAN_FLAGS */ + nla_total_size(4) /* IFLA_MACVLAN_MACADDR_COUNT */ + macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */ + + nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN */ + + nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN_USED */ ); } @@ -1625,6 +1642,7 @@ static int macvlan_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_port *port = vlan->port; int i; struct nlattr *nest; @@ -1645,6 +1663,10 @@ static int macvlan_fill_info(struct sk_buff *skb, } nla_nest_end(skb, nest); } + if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN, vlan->bc_queue_len_req)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN_USED, port->bc_queue_len_used)) + goto nla_put_failure; return 0; nla_put_failure: @@ -1658,6 +1680,8 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { [IFLA_MACVLAN_MACADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MACVLAN_MACADDR_DATA] = { .type = NLA_NESTED }, [IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 }, + [IFLA_MACVLAN_BC_QUEUE_LEN] = { .type = NLA_U32 }, + [IFLA_MACVLAN_BC_QUEUE_LEN_USED] = { .type = NLA_REJECT }, }; int macvlan_link_register(struct rtnl_link_ops *ops) @@ -1688,6 +1712,18 @@ static struct rtnl_link_ops macvlan_link_ops = { .priv_size = sizeof(struct macvlan_dev), }; +static void update_port_bc_queue_len(struct macvlan_port *port) +{ + u32 max_bc_queue_len_req = 0; + struct macvlan_dev *vlan; + + list_for_each_entry(vlan, &port->vlans, list) { + if (vlan->bc_queue_len_req > max_bc_queue_len_req) + max_bc_queue_len_req = vlan->bc_queue_len_req; + } + port->bc_queue_len_used = max_bc_queue_len_req; +} + static int macvlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a367ead4bf4b..96556c64c95d 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -30,6 +30,7 @@ struct macvlan_dev { enum macvlan_mode mode; u16 flags; unsigned int macaddr_count; + u32 bc_queue_len_req; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c4b23f06f69e..874cc12a34d9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -588,6 +588,8 @@ enum { IFLA_MACVLAN_MACADDR, IFLA_MACVLAN_MACADDR_DATA, IFLA_MACVLAN_MACADDR_COUNT, + IFLA_MACVLAN_BC_QUEUE_LEN, + IFLA_MACVLAN_BC_QUEUE_LEN_USED, __IFLA_MACVLAN_MAX, }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 781e482dc499..d208b2af697f 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -409,6 +409,8 @@ enum { IFLA_MACVLAN_MACADDR, IFLA_MACVLAN_MACADDR_DATA, IFLA_MACVLAN_MACADDR_COUNT, + IFLA_MACVLAN_BC_QUEUE_LEN, + IFLA_MACVLAN_BC_QUEUE_LEN_USED, __IFLA_MACVLAN_MAX, }; -- cgit v1.3.1 From 0c55f867f0c96dff93d4e0b5973975d65afb26d8 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 2 Dec 2020 21:35:36 +0100 Subject: selftests: kvm/set_memory_region_test: Fix race in move region test The current memory region move test correctly handles the situation that the second (realigning) memslot move operation would temporarily trigger MMIO until it completes, however it does not handle the case in which the first (misaligning) move operation does this, too. This results in false test assertions in case it does so. Fix this by handling temporary MMIO from the first memslot move operation in the test guest code, too. Fixes: 8a0639fe9201 ("KVM: sefltests: Add explicit synchronization to move mem region test") Signed-off-by: Maciej S. Szmigiero Message-Id: <0fdddb94bb0e31b7da129a809a308d91c10c0b5e.1606941224.git.maciej.szmigiero@oracle.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/set_memory_region_test.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index b3ece55a2da6..6f441dd9f33c 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -156,14 +156,23 @@ static void guest_code_move_memory_region(void) GUEST_SYNC(0); /* - * Spin until the memory region is moved to a misaligned address. This - * may or may not trigger MMIO, as the window where the memslot is - * invalid is quite small. + * Spin until the memory region starts getting moved to a + * misaligned address. + * Every region move may or may not trigger MMIO, as the + * window where the memslot is invalid is usually quite small. */ val = guest_spin_on_val(0); GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); - /* Spin until the memory region is realigned. */ + /* Spin until the misaligning memory region move completes. */ + val = guest_spin_on_val(MMIO_VAL); + GUEST_ASSERT_1(val == 1 || val == 0, val); + + /* Spin until the memory region starts to get re-aligned. */ + val = guest_spin_on_val(0); + GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); + + /* Spin until the re-aligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); GUEST_ASSERT_1(val == 1, val); -- cgit v1.3.1 From 71ccb50074f31a50a1da4c1d8306d54da0907b00 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 1 Dec 2020 22:52:41 -0800 Subject: tools/bpftool: Emit name for anonymous BTFs For consistency of output, emit "name " for BTFs without the name. This keeps output more consistent and obvious. Suggested-by: Song Liu Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201202065244.530571-2-andrii@kernel.org --- tools/bpf/bpftool/btf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index ed5e97157241..bd46af6a61cc 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -750,6 +750,8 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("name [%s] ", name); else if (name && name[0]) printf("name %s ", name); + else + printf("name "); printf("size %uB", info->btf_size); n = 0; -- cgit v1.3.1 From 0cfdcd6378071f383c900e3d8862347e2af1d1ca Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 1 Dec 2020 22:52:42 -0800 Subject: libbpf: Add base BTF accessor Add ability to get base BTF. It can be also used to check if BTF is split BTF. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201202065244.530571-3-andrii@kernel.org --- tools/lib/bpf/btf.c | 5 +++++ tools/lib/bpf/btf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 8ff46cd30ca1..1935e83d309c 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -432,6 +432,11 @@ __u32 btf__get_nr_types(const struct btf *btf) return btf->start_id + btf->nr_types - 1; } +const struct btf *btf__base_btf(const struct btf *btf) +{ + return btf->base_btf; +} + /* internal helper returning non-const pointer to a type */ static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 1093f6fe6800..1237bcd1dd17 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -51,6 +51,7 @@ LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind); LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); +LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API size_t btf__pointer_size(const struct btf *btf); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 29ff4807b909..ed55498c4122 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -340,6 +340,7 @@ LIBBPF_0.2.0 { LIBBPF_0.3.0 { global: + btf__base_btf; btf__parse_elf_split; btf__parse_raw_split; btf__parse_split; -- cgit v1.3.1 From fa4528379a51ff8d5271e1bcfa0d5ef71657869f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 1 Dec 2020 22:52:43 -0800 Subject: tools/bpftool: Auto-detect split BTFs in common cases In case of working with module's split BTF from /sys/kernel/btf/*, auto-substitute /sys/kernel/btf/vmlinux as the base BTF. This makes using bpftool with module BTFs faster and simpler. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201202065244.530571-4-andrii@kernel.org --- tools/bpf/bpftool/btf.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index bd46af6a61cc..94cd3bad5430 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -357,11 +357,13 @@ static int dump_btf_raw(const struct btf *btf, dump_btf_type(btf, root_type_ids[i], t); } } else { + const struct btf *base; int cnt = btf__get_nr_types(btf); int start_id = 1; - if (base_btf) - start_id = btf__get_nr_types(base_btf) + 1; + base = btf__base_btf(btf); + if (base) + start_id = btf__get_nr_types(base) + 1; for (i = start_id; i <= cnt; i++) { t = btf__type_by_id(btf, i); @@ -428,7 +430,7 @@ done: static int do_dump(int argc, char **argv) { - struct btf *btf = NULL; + struct btf *btf = NULL, *base = NULL; __u32 root_type_ids[2]; int root_type_cnt = 0; bool dump_c = false; @@ -502,7 +504,21 @@ static int do_dump(int argc, char **argv) } NEXT_ARG(); } else if (is_prefix(src, "file")) { - btf = btf__parse_split(*argv, base_btf); + const char sysfs_prefix[] = "/sys/kernel/btf/"; + const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux"; + + if (!base_btf && + strncmp(*argv, sysfs_prefix, sizeof(sysfs_prefix) - 1) == 0 && + strcmp(*argv, sysfs_vmlinux) != 0) { + base = btf__parse(sysfs_vmlinux, NULL); + if (libbpf_get_error(base)) { + p_err("failed to parse vmlinux BTF at '%s': %ld\n", + sysfs_vmlinux, libbpf_get_error(base)); + base = NULL; + } + } + + btf = btf__parse_split(*argv, base ?: base_btf); if (IS_ERR(btf)) { err = -PTR_ERR(btf); btf = NULL; @@ -567,6 +583,7 @@ static int do_dump(int argc, char **argv) done: close(fd); btf__free(btf); + btf__free(base); return err; } -- cgit v1.3.1 From a874c8c389a12b9f5ab67ba01995f06bf82e94fe Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 2 Dec 2020 09:49:47 -0800 Subject: selftests/bpf: Copy file using read/write in local storage test Splice (copy_file_range) doesn't work on all filesystems. I'm running test kernels on top of my read-only disk image and it uses plan9 under the hood. This prevents test_local_storage from successfully passing. There is really no technical reason to use splice, so lets do old-school read/write to copy file; this should work in all environments. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201202174947.3621989-1-sdf@google.com --- .../selftests/bpf/prog_tests/test_local_storage.c | 28 ++++++++++++++-------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index fcca7ba1f368..c0fe73a17ed1 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -21,14 +21,6 @@ static inline int sys_pidfd_open(pid_t pid, unsigned int flags) return syscall(__NR_pidfd_open, pid, flags); } -static inline ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out, - loff_t *off_out, size_t len, - unsigned int flags) -{ - return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, - len, flags); -} - static unsigned int duration; #define TEST_STORAGE_VALUE 0xbeefdead @@ -47,6 +39,7 @@ static int copy_rm(char *dest) { int fd_in, fd_out = -1, ret = 0; struct stat stat; + char *buf = NULL; fd_in = open("/bin/rm", O_RDONLY); if (fd_in < 0) @@ -64,18 +57,33 @@ static int copy_rm(char *dest) goto out; } - ret = copy_file_range(fd_in, NULL, fd_out, NULL, stat.st_size, 0); - if (ret == -1) { + buf = malloc(stat.st_blksize); + if (!buf) { ret = -errno; goto out; } + while (ret = read(fd_in, buf, stat.st_blksize), ret > 0) { + ret = write(fd_out, buf, ret); + if (ret < 0) { + ret = -errno; + goto out; + + } + } + if (ret < 0) { + ret = -errno; + goto out; + + } + /* Set executable permission on the copied file */ ret = chmod(dest, 0100); if (ret == -1) ret = -errno; out: + free(buf); close(fd_in); close(fd_out); return ret; -- cgit v1.3.1 From 22e8ebe35a2e30ee19e02c41cacc99c2f896bc4b Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 3 Dec 2020 10:22:34 +0000 Subject: tools/resolve_btfids: Fix some error messages Add missing newlines and fix polarity of strerror argument. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20201203102234.648540-1-jackmanb@google.com --- tools/bpf/resolve_btfids/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index dfa540d8a02d..e3ea569ee125 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -454,7 +454,7 @@ static int symbols_collect(struct object *obj) return -ENOMEM; if (id->addr_cnt >= ADDR_CNT) { - pr_err("FAILED symbol %s crossed the number of allowed lists", + pr_err("FAILED symbol %s crossed the number of allowed lists\n", id->name); return -1; } @@ -477,8 +477,8 @@ static int symbols_resolve(struct object *obj) btf = btf__parse(obj->btf ?: obj->path, NULL); err = libbpf_get_error(btf); if (err) { - pr_err("FAILED: load BTF from %s: %s", - obj->path, strerror(err)); + pr_err("FAILED: load BTF from %s: %s\n", + obj->path, strerror(-err)); return -1; } -- cgit v1.3.1 From e459f49b4394e2630ea55d5ac7a49402686848fe Mon Sep 17 00:00:00 2001 From: Mariusz Dudek Date: Thu, 3 Dec 2020 10:05:45 +0100 Subject: libbpf: Separate XDP program load with xsk socket creation Add support for separation of eBPF program load and xsk socket creation. This is needed for use-case when you want to privide as little privileges as possible to the data plane application that will handle xsk socket creation and incoming traffic. With this patch the data entity container can be run with only CAP_NET_RAW capability to fulfill its purpose of creating xsk socket and handling packages. In case your umem is larger or equal process limit for MEMLOCK you need either increase the limit or CAP_IPC_LOCK capability. To resolve privileges issue two APIs are introduced: - xsk_setup_xdp_prog - loads the built in XDP program. It can also return xsks_map_fd which is needed by unprivileged process to update xsks_map with AF_XDP socket "fd" - xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap for a particular xsk_socket Signed-off-by: Mariusz Dudek Signed-off-by: Alexei Starovoitov Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20201203090546.11976-2-mariuszx.dudek@intel.com --- tools/lib/bpf/libbpf.map | 2 ++ tools/lib/bpf/xsk.c | 92 +++++++++++++++++++++++++++++++++++++++++++----- tools/lib/bpf/xsk.h | 5 +++ 3 files changed, 90 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index ed55498c4122..7c4126542e2b 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -346,4 +346,6 @@ LIBBPF_0.3.0 { btf__parse_split; btf__new_empty_split; btf__new_split; + xsk_setup_xdp_prog; + xsk_socket__update_xskmap; } LIBBPF_0.2.0; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 9bc537d0b92d..4b051ec7cfbb 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -566,8 +566,35 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk) &xsk->fd, 0); } -static int xsk_setup_xdp_prog(struct xsk_socket *xsk) +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) { + char ifname[IFNAMSIZ]; + struct xsk_ctx *ctx; + char *interface; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return -ENOMEM; + + interface = if_indextoname(ifindex, &ifname[0]); + if (!interface) { + free(ctx); + return -errno; + } + + ctx->ifindex = ifindex; + strncpy(ctx->ifname, ifname, IFNAMSIZ - 1); + ctx->ifname[IFNAMSIZ - 1] = 0; + + xsk->ctx = ctx; + + return 0; +} + +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, + int *xsks_map_fd) +{ + struct xsk_socket *xsk = _xdp; struct xsk_ctx *ctx = xsk->ctx; __u32 prog_id = 0; int err; @@ -584,8 +611,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) err = xsk_load_xdp_prog(xsk); if (err) { - xsk_delete_bpf_maps(xsk); - return err; + goto err_load_xdp_prog; } } else { ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); @@ -598,15 +624,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) } } - if (xsk->rx) + if (xsk->rx) { err = xsk_set_bpf_maps(xsk); - if (err) { - xsk_delete_bpf_maps(xsk); - close(ctx->prog_fd); - return err; + if (err) { + if (!prog_id) { + goto err_set_bpf_maps; + } else { + close(ctx->prog_fd); + return err; + } + } } + if (xsks_map_fd) + *xsks_map_fd = ctx->xsks_map_fd; return 0; + +err_set_bpf_maps: + close(ctx->prog_fd); + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); +err_load_xdp_prog: + xsk_delete_bpf_maps(xsk); + + return err; } static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, @@ -689,6 +729,40 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, return ctx; } +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) +{ + free(xsk->ctx); + free(xsk); +} + +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) +{ + xsk->ctx->xsks_map_fd = fd; + return xsk_set_bpf_maps(xsk); +} + +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) +{ + struct xsk_socket *xsk; + int res; + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + res = xsk_create_xsk_struct(ifindex, xsk); + if (res) { + free(xsk); + return -EINVAL; + } + + res = __xsk_setup_xdp_prog(xsk, xsks_map_fd); + + xsk_destroy_xsk_struct(xsk); + + return res; +} + int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, const char *ifname, __u32 queue_id, struct xsk_umem *umem, @@ -838,7 +912,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, ctx->prog_fd = -1; if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { - err = xsk_setup_xdp_prog(xsk); + err = __xsk_setup_xdp_prog(xsk, NULL); if (err) goto out_mmap_tx; } diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 5865e082ba0b..e9f121f5d129 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -204,6 +204,11 @@ struct xsk_umem_config { __u32 flags; }; +LIBBPF_API int xsk_setup_xdp_prog(int ifindex, + int *xsks_map_fd); +LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, + int xsks_map_fd); + /* Flags for the libbpf_flags field. */ #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) -- cgit v1.3.1 From 3db980449bc3b9765c78210787bcbf4305636982 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 3 Dec 2020 19:14:34 +0000 Subject: selftests/bpf: Update ima_setup.sh for busybox losetup on busybox does not output the name of loop device on using -f with --show. It also doesn't support -j to find the loop devices for a given backing file. losetup is updated to use "-a" which is available on busybox. blkid does not support options (-s and -o) to only display the uuid, so parse the output instead. Not all environments have mkfs.ext4, the test requires a loop device with a backing image file which could formatted with any filesystem. Update to using mkfs.ext2 which is available on busybox. Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash") Reported-by: Andrii Nakryiko Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201203191437.666737-2-kpsingh@chromium.org --- tools/testing/selftests/bpf/ima_setup.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh index 15490ccc5e55..137f2d32598f 100755 --- a/tools/testing/selftests/bpf/ima_setup.sh +++ b/tools/testing/selftests/bpf/ima_setup.sh @@ -3,6 +3,7 @@ set -e set -u +set -o pipefail IMA_POLICY_FILE="/sys/kernel/security/ima/policy" TEST_BINARY="/bin/true" @@ -23,13 +24,15 @@ setup() dd if=/dev/zero of="${mount_img}" bs=1M count=10 - local loop_device="$(losetup --find --show ${mount_img})" + losetup -f "${mount_img}" + local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1) - mkfs.ext4 "${loop_device}" + mkfs.ext2 "${loop_device:?}" mount "${loop_device}" "${mount_dir}" cp "${TEST_BINARY}" "${mount_dir}" - local mount_uuid="$(blkid -s UUID -o value ${loop_device})" + local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')" + echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE} } @@ -38,7 +41,8 @@ cleanup() { local mount_img="${tmp_dir}/test.img" local mount_dir="${tmp_dir}/mnt" - local loop_devices=$(losetup -j ${mount_img} -O NAME --noheadings) + local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1) + for loop_dev in "${loop_devices}"; do losetup -d $loop_dev done -- cgit v1.3.1 From 1ee076719d4e14c005f375c50731ed44eb48fee4 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 3 Dec 2020 19:14:35 +0000 Subject: selftests/bpf: Ensure securityfs mount before writing ima policy SecurityFS may not be mounted even if it is enabled in the kernel config. So, check if the mount exists in /proc/mounts by parsing the file and, if not, mount it on /sys/kernel/security. Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash") Reported-by: Andrii Nakryiko Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201203191437.666737-3-kpsingh@chromium.org --- tools/testing/selftests/bpf/ima_setup.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh index 137f2d32598f..b1ee4bf06996 100755 --- a/tools/testing/selftests/bpf/ima_setup.sh +++ b/tools/testing/selftests/bpf/ima_setup.sh @@ -14,6 +14,20 @@ usage() exit 1 } +ensure_mount_securityfs() +{ + local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}') + + if [ -z "${securityfs_dir}" ]; then + securityfs_dir=/sys/kernel/security + mount -t securityfs security "${securityfs_dir}" + fi + + if [ ! -d "${securityfs_dir}" ]; then + echo "${securityfs_dir}: securityfs is not mounted" && exit 1 + fi +} + setup() { local tmp_dir="$1" @@ -33,6 +47,7 @@ setup() cp "${TEST_BINARY}" "${mount_dir}" local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')" + ensure_mount_securityfs echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE} } -- cgit v1.3.1 From d932e043b9d6d60113e90267ae2fbe4e946d7b08 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 3 Dec 2020 19:14:36 +0000 Subject: selftests/bpf: Add config dependency on BLK_DEV_LOOP The ima selftest restricts its scope to a test filesystem image mounted on a loop device and prevents permanent ima policy changes for the whole system. Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash") Reported-by: Andrii Nakryiko Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201203191437.666737-4-kpsingh@chromium.org --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 365bf9771b07..37e1f303fc11 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -43,3 +43,4 @@ CONFIG_IMA=y CONFIG_SECURITYFS=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_READ_POLICY=y +CONFIG_BLK_DEV_LOOP=y -- cgit v1.3.1 From ffebecd9d49542046c5ecbb410af01e016636e19 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 3 Dec 2020 19:14:37 +0000 Subject: selftests/bpf: Indent ima_setup.sh with tabs. The file was formatted with spaces instead of tabs and went unnoticed as checkpatch.pl did not complain (probably because this is a shell script). Re-indent it with tabs to be consistent with other scripts. Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201203191437.666737-5-kpsingh@chromium.org --- tools/testing/selftests/bpf/ima_setup.sh | 108 +++++++++++++++---------------- 1 file changed, 54 insertions(+), 54 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh index b1ee4bf06996..2bfc646bc230 100755 --- a/tools/testing/selftests/bpf/ima_setup.sh +++ b/tools/testing/selftests/bpf/ima_setup.sh @@ -10,90 +10,90 @@ TEST_BINARY="/bin/true" usage() { - echo "Usage: $0 " - exit 1 + echo "Usage: $0 " + exit 1 } ensure_mount_securityfs() { - local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}') + local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}') - if [ -z "${securityfs_dir}" ]; then - securityfs_dir=/sys/kernel/security - mount -t securityfs security "${securityfs_dir}" - fi + if [ -z "${securityfs_dir}" ]; then + securityfs_dir=/sys/kernel/security + mount -t securityfs security "${securityfs_dir}" + fi - if [ ! -d "${securityfs_dir}" ]; then - echo "${securityfs_dir}: securityfs is not mounted" && exit 1 - fi + if [ ! -d "${securityfs_dir}" ]; then + echo "${securityfs_dir}: securityfs is not mounted" && exit 1 + fi } setup() { - local tmp_dir="$1" - local mount_img="${tmp_dir}/test.img" - local mount_dir="${tmp_dir}/mnt" - local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})" - mkdir -p ${mount_dir} + local tmp_dir="$1" + local mount_img="${tmp_dir}/test.img" + local mount_dir="${tmp_dir}/mnt" + local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})" + mkdir -p ${mount_dir} - dd if=/dev/zero of="${mount_img}" bs=1M count=10 + dd if=/dev/zero of="${mount_img}" bs=1M count=10 - losetup -f "${mount_img}" - local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1) + losetup -f "${mount_img}" + local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1) - mkfs.ext2 "${loop_device:?}" - mount "${loop_device}" "${mount_dir}" + mkfs.ext2 "${loop_device:?}" + mount "${loop_device}" "${mount_dir}" - cp "${TEST_BINARY}" "${mount_dir}" - local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')" + cp "${TEST_BINARY}" "${mount_dir}" + local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')" - ensure_mount_securityfs - echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE} + ensure_mount_securityfs + echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE} } cleanup() { - local tmp_dir="$1" - local mount_img="${tmp_dir}/test.img" - local mount_dir="${tmp_dir}/mnt" + local tmp_dir="$1" + local mount_img="${tmp_dir}/test.img" + local mount_dir="${tmp_dir}/mnt" - local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1) + local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1) - for loop_dev in "${loop_devices}"; do - losetup -d $loop_dev - done + for loop_dev in "${loop_devices}"; do + losetup -d $loop_dev + done - umount ${mount_dir} - rm -rf ${tmp_dir} + umount ${mount_dir} + rm -rf ${tmp_dir} } run() { - local tmp_dir="$1" - local mount_dir="${tmp_dir}/mnt" - local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})" + local tmp_dir="$1" + local mount_dir="${tmp_dir}/mnt" + local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})" - exec "${copied_bin_path}" + exec "${copied_bin_path}" } main() { - [[ $# -ne 2 ]] && usage - - local action="$1" - local tmp_dir="$2" - - [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1 - - if [[ "${action}" == "setup" ]]; then - setup "${tmp_dir}" - elif [[ "${action}" == "cleanup" ]]; then - cleanup "${tmp_dir}" - elif [[ "${action}" == "run" ]]; then - run "${tmp_dir}" - else - echo "Unknown action: ${action}" - exit 1 - fi + [[ $# -ne 2 ]] && usage + + local action="$1" + local tmp_dir="$2" + + [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1 + + if [[ "${action}" == "setup" ]]; then + setup "${tmp_dir}" + elif [[ "${action}" == "cleanup" ]]; then + cleanup "${tmp_dir}" + elif [[ "${action}" == "run" ]]; then + run "${tmp_dir}" + else + echo "Unknown action: ${action}" + exit 1 + fi } main "$@" -- cgit v1.3.1 From 80b2b5c3a701d56de98d00d99bc9cc384fb316d9 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Wed, 2 Dec 2020 23:34:10 -0500 Subject: libbpf: Fail early when loading programs with unspecified type Before this patch, a program with unspecified type (BPF_PROG_TYPE_UNSPEC) would be passed to the BPF syscall, only to have the kernel reject it with an opaque invalid argument error. This patch makes libbpf reject such programs with a nicer error message - in particular libbpf now tries to diagnose bad ELF section names at both open time and load time. Signed-off-by: Andrei Matei Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201203043410.59699-1-andreimatei1@gmail.com --- tools/lib/bpf/libbpf.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 313034117070..d6f45538444d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6629,6 +6629,16 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *log_buf = NULL; int btf_fd, ret; + if (prog->type == BPF_PROG_TYPE_UNSPEC) { + /* + * The program type must be set. Most likely we couldn't find a proper + * section definition at load time, and thus we didn't infer the type. + */ + pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", + prog->name, prog->sec_name); + return -EINVAL; + } + if (!insns || !insns_cnt) return -EINVAL; @@ -6920,9 +6930,12 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_object__for_each_program(prog, obj) { prog->sec_def = find_sec_def(prog->sec_name); - if (!prog->sec_def) + if (!prog->sec_def) { /* couldn't guess, but user might manually specify */ + pr_debug("prog '%s': unrecognized ELF section name '%s'\n", + prog->name, prog->sec_name); continue; + } if (prog->sec_def->is_sleepable) prog->prog_flags |= BPF_F_SLEEPABLE; -- cgit v1.3.1 From 9cf309c56f7910a81fbe053b6f11c3b1f0987b12 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 3 Dec 2020 10:33:06 +0100 Subject: libbpf: Sanitise map names before pinning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we added sanitising of map names before loading programs to libbpf, we still allowed periods in the name. While the kernel will accept these for the map names themselves, they are not allowed in file names when pinning maps. This means that bpf_object__pin_maps() will fail if called on an object that contains internal maps (such as sections .rodata). Fix this by replacing periods with underscores when constructing map pin paths. This only affects the paths generated by libbpf when bpf_object__pin_maps() is called with a path argument. Any pin paths set by bpf_map__set_pin_path() are unaffected, and it will still be up to the caller to avoid invalid characters in those. Fixes: 113e6b7e15e2 ("libbpf: Sanitise internal map names so they are not rejected by the kernel") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201203093306.107676-1-toke@redhat.com --- tools/lib/bpf/libbpf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d6f45538444d..b2e16efabde0 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7659,6 +7659,16 @@ bool bpf_map__is_pinned(const struct bpf_map *map) return map->pinned; } +static void sanitize_pin_path(char *s) +{ + /* bpffs disallows periods in path names */ + while (*s) { + if (*s == '.') + *s = '_'; + s++; + } +} + int bpf_object__pin_maps(struct bpf_object *obj, const char *path) { struct bpf_map *map; @@ -7688,6 +7698,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) err = -ENAMETOOLONG; goto err_unpin_maps; } + sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { continue; @@ -7732,6 +7743,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; + sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { continue; -- cgit v1.3.1 From d6d418bd8f92aaa4c7c26d606188147c2ee0dae9 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 2 Dec 2020 15:13:32 -0800 Subject: libbpf: Cap retries in sys_bpf_prog_load I've seen a situation, where a process that's under pprof constantly generates SIGPROF which prevents program loading indefinitely. The right thing to do probably is to disable signals in the upper layers while loading, but it still would be nice to get some error from libbpf instead of an endless loop. Let's add some small retry limit to the program loading: try loading the program 5 (arbitrary) times and give up. v2: * 10 -> 5 retires (Andrii Nakryiko) Signed-off-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201202231332.3923644-1-sdf@google.com --- tools/lib/bpf/bpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index d27e34133973..4025266d0fb0 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -67,11 +67,12 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) { + int retries = 5; int fd; do { fd = sys_bpf(BPF_PROG_LOAD, attr, size); - } while (fd < 0 && errno == EAGAIN); + } while (fd < 0 && errno == EAGAIN && retries-- > 0); return fd; } -- cgit v1.3.1 From 58c185b85d0c1753b0b6a9390294bd883faf4d77 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 3 Dec 2020 12:08:50 +0000 Subject: bpf: Fix cold build of test_progs-no_alu32 This object lives inside the trunner output dir, i.e. tools/testing/selftests/bpf/no_alu32/btf_data.o At some point it gets copied into the parent directory during another part of the build, but that doesn't happen when building test_progs-no_alu32 from clean. Signed-off-by: Brendan Jackman Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20201203120850.859170-1-jackmanb@google.com --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 894192c319fb..371b022d932c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -378,7 +378,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ + $(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ endef -- cgit v1.3.1 From 55144f31f0d2fdd3e74ead67f1649bf577961eaa Mon Sep 17 00:00:00 2001 From: Prankur gupta Date: Wed, 2 Dec 2020 13:31:52 -0800 Subject: selftests/bpf: Add Userspace tests for TCP_WINDOW_CLAMP Adding selftests for new added functionality to set TCP_WINDOW_CLAMP from bpf setsockopt. Signed-off-by: Prankur gupta Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201202213152.435886-3-prankgup@fb.com --- tools/testing/selftests/bpf/bpf_tcp_helpers.h | 1 + .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 4 +++ .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 33 ++++++++++++++++++++++ tools/testing/selftests/bpf/test_tcpbpf.h | 2 ++ 4 files changed, 40 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 2915664c335d..6a9053162cf2 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -56,6 +56,7 @@ struct tcp_sock { __u32 rcv_nxt; __u32 snd_nxt; __u32 snd_una; + __u32 window_clamp; __u8 ecn_flags; __u32 delivered; __u32 delivered_ce; diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c index ab5281475f44..87923d2865b7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c @@ -42,6 +42,10 @@ static void verify_result(struct tcpbpf_globals *result) /* check getsockopt for SAVED_SYN */ ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn"); + + /* check getsockopt for window_clamp */ + ASSERT_EQ(result->window_clamp_client, 9216, "window_clamp_client"); + ASSERT_EQ(result->window_clamp_server, 9216, "window_clamp_server"); } static void run_test(struct tcpbpf_globals *result) diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index e85e49deba70..94f50f7e94d6 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -12,17 +12,41 @@ #include #include #include +#include "bpf_tcp_helpers.h" #include "test_tcpbpf.h" struct tcpbpf_globals global = {}; int _version SEC("version") = 1; +/** + * SOL_TCP is defined in while + * TCP_SAVED_SYN is defined in already included + */ +#ifndef SOL_TCP +#define SOL_TCP 6 +#endif + +static __always_inline int get_tp_window_clamp(struct bpf_sock_ops *skops) +{ + struct bpf_sock *sk; + struct tcp_sock *tp; + + sk = skops->sk; + if (!sk) + return -1; + tp = bpf_skc_to_tcp_sock(sk); + if (!tp) + return -1; + return tp->window_clamp; +} + SEC("sockops") int bpf_testcb(struct bpf_sock_ops *skops) { char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)]; struct bpf_sock_ops *reuse = skops; struct tcphdr *thdr; + int window_clamp = 9216; int good_call_rv = 0; int bad_call_rv = 0; int save_syn = 1; @@ -75,6 +99,11 @@ int bpf_testcb(struct bpf_sock_ops *skops) global.event_map |= (1 << op); switch (op) { + case BPF_SOCK_OPS_TCP_CONNECT_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP, + &window_clamp, sizeof(window_clamp)); + global.window_clamp_client = get_tp_window_clamp(skops); + break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: /* Test failure to set largest cb flag (assumes not defined) */ global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); @@ -100,6 +129,10 @@ int bpf_testcb(struct bpf_sock_ops *skops) global.tcp_saved_syn = v; } } + rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP, + &window_clamp, sizeof(window_clamp)); + + global.window_clamp_server = get_tp_window_clamp(skops); break; case BPF_SOCK_OPS_RTO_CB: break; diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h index 0ed33521cbbb..9dd9b5590f9d 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf.h +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -16,5 +16,7 @@ struct tcpbpf_globals { __u32 num_close_events; __u32 tcp_save_syn; __u32 tcp_saved_syn; + __u32 window_clamp_client; + __u32 window_clamp_server; }; #endif -- cgit v1.3.1 From a19f93cfafdf85851c69bc9f677aa4f40c53610f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:23 -0800 Subject: libbpf: Add internal helper to load BTF data by FD Add a btf_get_from_fd() helper, which constructs struct btf from in-kernel BTF data by FD. This is used for loading module BTFs. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-4-andrii@kernel.org --- tools/lib/bpf/btf.c | 61 +++++++++++++++++++++++------------------ tools/lib/bpf/libbpf_internal.h | 1 + 2 files changed, 36 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1935e83d309c..3c3f2bc6c652 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1323,35 +1323,27 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) return btf__str_by_offset(btf, offset); } -int btf__get_from_id(__u32 id, struct btf **btf) +struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) { - struct bpf_btf_info btf_info = { 0 }; + struct bpf_btf_info btf_info; __u32 len = sizeof(btf_info); __u32 last_size; - int btf_fd; + struct btf *btf; void *ptr; int err; - err = 0; - *btf = NULL; - btf_fd = bpf_btf_get_fd_by_id(id); - if (btf_fd < 0) - return 0; - /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so * let's start with a sane default - 4KiB here - and resize it only if * bpf_obj_get_info_by_fd() needs a bigger buffer. */ - btf_info.btf_size = 4096; - last_size = btf_info.btf_size; + last_size = 4096; ptr = malloc(last_size); - if (!ptr) { - err = -ENOMEM; - goto exit_free; - } + if (!ptr) + return ERR_PTR(-ENOMEM); - memset(ptr, 0, last_size); + memset(&btf_info, 0, sizeof(btf_info)); btf_info.btf = ptr_to_u64(ptr); + btf_info.btf_size = last_size; err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); if (!err && btf_info.btf_size > last_size) { @@ -1360,31 +1352,48 @@ int btf__get_from_id(__u32 id, struct btf **btf) last_size = btf_info.btf_size; temp_ptr = realloc(ptr, last_size); if (!temp_ptr) { - err = -ENOMEM; + btf = ERR_PTR(-ENOMEM); goto exit_free; } ptr = temp_ptr; - memset(ptr, 0, last_size); + + len = sizeof(btf_info); + memset(&btf_info, 0, sizeof(btf_info)); btf_info.btf = ptr_to_u64(ptr); + btf_info.btf_size = last_size; + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); } if (err || btf_info.btf_size > last_size) { - err = errno; + btf = err ? ERR_PTR(-errno) : ERR_PTR(-E2BIG); goto exit_free; } - *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size); - if (IS_ERR(*btf)) { - err = PTR_ERR(*btf); - *btf = NULL; - } + btf = btf_new(ptr, btf_info.btf_size, base_btf); exit_free: - close(btf_fd); free(ptr); + return btf; +} - return err; +int btf__get_from_id(__u32 id, struct btf **btf) +{ + struct btf *res; + int btf_fd; + + *btf = NULL; + btf_fd = bpf_btf_get_fd_by_id(id); + if (btf_fd < 0) + return -errno; + + res = btf_get_from_fd(btf_fd, NULL); + close(btf_fd); + if (IS_ERR(res)) + return PTR_ERR(res); + + *btf = res; + return 0; } int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index d99bc847bf84..e569ae63808e 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -155,6 +155,7 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, __u32 *size); int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, __u32 *off); +struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); struct btf_ext_info { /* -- cgit v1.3.1 From 0f7515ca7cddadabe04e28e20a257b1bbb6cb98a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:24 -0800 Subject: libbpf: Refactor CO-RE relocs to not assume a single BTF object Refactor CO-RE relocation candidate search to not expect a single BTF, rather return all candidate types with their corresponding BTF objects. This will allow to extend CO-RE relocations to accommodate kernel module BTFs. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201203204634.1325171-5-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 187 +++++++++++++++++++++++++++++-------------------- 1 file changed, 111 insertions(+), 76 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b2e16efabde0..2e4fa3ce6b94 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -462,11 +462,14 @@ struct bpf_object { struct list_head list; struct btf *btf; + struct btf_ext *btf_ext; + /* Parse and load BTF vmlinux if any of the programs in the object need * it at load time. */ struct btf *btf_vmlinux; - struct btf_ext *btf_ext; + /* vmlinux BTF override for CO-RE relocations */ + struct btf *btf_vmlinux_override; void *priv; bpf_object_clear_priv_t clear_priv; @@ -4600,46 +4603,43 @@ static size_t bpf_core_essential_name_len(const char *name) return n; } -/* dynamically sized list of type IDs */ -struct ids_vec { - __u32 *data; +struct core_cand +{ + const struct btf *btf; + const struct btf_type *t; + const char *name; + __u32 id; +}; + +/* dynamically sized list of type IDs and its associated struct btf */ +struct core_cand_list { + struct core_cand *cands; int len; }; -static void bpf_core_free_cands(struct ids_vec *cand_ids) +static void bpf_core_free_cands(struct core_cand_list *cands) { - free(cand_ids->data); - free(cand_ids); + free(cands->cands); + free(cands); } -static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, - __u32 local_type_id, - const struct btf *targ_btf) +static int bpf_core_add_cands(struct core_cand *local_cand, + size_t local_essent_len, + const struct btf *targ_btf, + const char *targ_btf_name, + int targ_start_id, + struct core_cand_list *cands) { - size_t local_essent_len, targ_essent_len; - const char *local_name, *targ_name; - const struct btf_type *t, *local_t; - struct ids_vec *cand_ids; - __u32 *new_ids; - int i, err, n; - - local_t = btf__type_by_id(local_btf, local_type_id); - if (!local_t) - return ERR_PTR(-EINVAL); - - local_name = btf__name_by_offset(local_btf, local_t->name_off); - if (str_is_empty(local_name)) - return ERR_PTR(-EINVAL); - local_essent_len = bpf_core_essential_name_len(local_name); - - cand_ids = calloc(1, sizeof(*cand_ids)); - if (!cand_ids) - return ERR_PTR(-ENOMEM); + struct core_cand *new_cands, *cand; + const struct btf_type *t; + const char *targ_name; + size_t targ_essent_len; + int n, i; n = btf__get_nr_types(targ_btf); - for (i = 1; i <= n; i++) { + for (i = targ_start_id; i <= n; i++) { t = btf__type_by_id(targ_btf, i); - if (btf_kind(t) != btf_kind(local_t)) + if (btf_kind(t) != btf_kind(local_cand->t)) continue; targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -4650,25 +4650,62 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (targ_essent_len != local_essent_len) continue; - if (strncmp(local_name, targ_name, local_essent_len) == 0) { - pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n", - local_type_id, btf_kind_str(local_t), - local_name, i, btf_kind_str(t), targ_name); - new_ids = libbpf_reallocarray(cand_ids->data, - cand_ids->len + 1, - sizeof(*cand_ids->data)); - if (!new_ids) { - err = -ENOMEM; - goto err_out; - } - cand_ids->data = new_ids; - cand_ids->data[cand_ids->len++] = i; - } + if (strncmp(local_cand->name, targ_name, local_essent_len) != 0) + continue; + + pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", + local_cand->id, btf_kind_str(local_cand->t), + local_cand->name, i, btf_kind_str(t), targ_name, + targ_btf_name); + new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, + sizeof(*cands->cands)); + if (!new_cands) + return -ENOMEM; + + cand = &new_cands[cands->len]; + cand->btf = targ_btf; + cand->t = t; + cand->name = targ_name; + cand->id = i; + + cands->cands = new_cands; + cands->len++; } - return cand_ids; -err_out: - bpf_core_free_cands(cand_ids); - return ERR_PTR(err); + return 0; +} + +static struct core_cand_list * +bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) +{ + struct core_cand local_cand = {}; + struct core_cand_list *cands; + size_t local_essent_len; + int err; + + local_cand.btf = local_btf; + local_cand.t = btf__type_by_id(local_btf, local_type_id); + if (!local_cand.t) + return ERR_PTR(-EINVAL); + + local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off); + if (str_is_empty(local_cand.name)) + return ERR_PTR(-EINVAL); + local_essent_len = bpf_core_essential_name_len(local_cand.name); + + cands = calloc(1, sizeof(*cands)); + if (!cands) + return ERR_PTR(-ENOMEM); + + /* Attempt to find target candidates in vmlinux BTF first */ + err = bpf_core_add_cands(&local_cand, local_essent_len, + obj->btf_vmlinux_override ?: obj->btf_vmlinux, + "vmlinux", 1, cands); + if (err) { + bpf_core_free_cands(cands); + return ERR_PTR(err); + } + + return cands; } /* Check two types for compatibility for the purpose of field access @@ -5661,7 +5698,6 @@ static int bpf_core_apply_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, - const struct btf *targ_btf, struct hashmap *cand_cache) { struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; @@ -5669,8 +5705,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog, struct bpf_core_relo_res cand_res, targ_res; const struct btf_type *local_type; const char *local_name; - struct ids_vec *cand_ids; - __u32 local_id, cand_id; + struct core_cand_list *cands = NULL; + __u32 local_id; const char *spec_str; int i, j, err; @@ -5717,24 +5753,24 @@ static int bpf_core_apply_relo(struct bpf_program *prog, return -EOPNOTSUPP; } - if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { - cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); - if (IS_ERR(cand_ids)) { + if (!hashmap__find(cand_cache, type_key, (void **)&cands)) { + cands = bpf_core_find_cands(prog->obj, local_btf, local_id); + if (IS_ERR(cands)) { pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld", prog->name, relo_idx, local_id, btf_kind_str(local_type), - local_name, PTR_ERR(cand_ids)); - return PTR_ERR(cand_ids); + local_name, PTR_ERR(cands)); + return PTR_ERR(cands); } - err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); + err = hashmap__set(cand_cache, type_key, cands, NULL, NULL); if (err) { - bpf_core_free_cands(cand_ids); + bpf_core_free_cands(cands); return err; } } - for (i = 0, j = 0; i < cand_ids->len; i++) { - cand_id = cand_ids->data[i]; - err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec); + for (i = 0, j = 0; i < cands->len; i++) { + err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, + cands->cands[i].id, &cand_spec); if (err < 0) { pr_warn("prog '%s': relo #%d: error matching candidate #%d ", prog->name, relo_idx, i); @@ -5778,7 +5814,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, return -EINVAL; } - cand_ids->data[j++] = cand_spec.root_type_id; + cands->cands[j++] = cands->cands[i]; } /* @@ -5790,7 +5826,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, * depending on relo's kind. */ if (j > 0) - cand_ids->len = j; + cands->len = j; /* * If no candidates were found, it might be both a programmer error, @@ -5834,20 +5870,19 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) struct hashmap_entry *entry; struct hashmap *cand_cache = NULL; struct bpf_program *prog; - struct btf *targ_btf; const char *sec_name; int i, err = 0, insn_idx, sec_idx; if (obj->btf_ext->core_relo_info.len == 0) return 0; - if (targ_btf_path) - targ_btf = btf__parse(targ_btf_path, NULL); - else - targ_btf = obj->btf_vmlinux; - if (IS_ERR_OR_NULL(targ_btf)) { - pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); - return PTR_ERR(targ_btf); + if (targ_btf_path) { + obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); + if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) { + err = PTR_ERR(obj->btf_vmlinux_override); + pr_warn("failed to parse target BTF: %d\n", err); + return err; + } } cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); @@ -5899,8 +5934,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) if (!prog->load) continue; - err = bpf_core_apply_relo(prog, rec, i, obj->btf, - targ_btf, cand_cache); + err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", prog->name, i, err); @@ -5911,8 +5945,9 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) out: /* obj->btf_vmlinux is freed at the end of object load phase */ - if (targ_btf != obj->btf_vmlinux) - btf__free(targ_btf); + btf__free(obj->btf_vmlinux_override); + obj->btf_vmlinux_override = NULL; + if (!IS_ERR_OR_NULL(cand_cache)) { hashmap__for_each_entry(cand_cache, entry, i) { bpf_core_free_cands(entry->value); -- cgit v1.3.1 From 4f33a53d56000cfa67e2e4e8a5dac08f084a979b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:25 -0800 Subject: libbpf: Add kernel module BTF support for CO-RE relocations Teach libbpf to search for candidate types for CO-RE relocations across kernel modules BTFs, in addition to vmlinux BTF. If at least one candidate type is found in vmlinux BTF, kernel module BTFs are not iterated. If vmlinux BTF has no matching candidates, then find all kernel module BTFs and search for all matching candidates across all of them. Kernel's support for module BTFs are inferred from the support for BTF name pointer in BPF UAPI. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-6-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 169 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2e4fa3ce6b94..ca20e493726d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -176,6 +176,8 @@ enum kern_feature_id { FEAT_PROBE_READ_KERN, /* BPF_PROG_BIND_MAP is supported */ FEAT_PROG_BIND_MAP, + /* Kernel support for module BTFs */ + FEAT_MODULE_BTF, __FEAT_CNT, }; @@ -402,6 +404,12 @@ struct extern_desc { static LIST_HEAD(bpf_objects_list); +struct module_btf { + struct btf *btf; + char *name; + __u32 id; +}; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; @@ -470,6 +478,11 @@ struct bpf_object { struct btf *btf_vmlinux; /* vmlinux BTF override for CO-RE relocations */ struct btf *btf_vmlinux_override; + /* Lazily initialized kernel module BTFs */ + struct module_btf *btf_modules; + bool btf_modules_loaded; + size_t btf_module_cnt; + size_t btf_module_cap; void *priv; bpf_object_clear_priv_t clear_priv; @@ -3960,6 +3973,35 @@ static int probe_prog_bind_map(void) return ret >= 0; } +static int probe_module_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_obj_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4003,7 +4045,10 @@ static struct kern_feature_desc { }, [FEAT_PROG_BIND_MAP] = { "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - } + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, }; static bool kernel_supports(enum kern_feature_id feat_id) @@ -4674,13 +4719,96 @@ static int bpf_core_add_cands(struct core_cand *local_cand, return 0; } +static int load_module_btfs(struct bpf_object *obj) +{ + struct bpf_btf_info info; + struct module_btf *mod_btf; + struct btf *btf; + char name[64]; + __u32 id = 0, len; + int err, fd; + + if (obj->btf_modules_loaded) + return 0; + + /* don't do this again, even if we find no module BTFs */ + obj->btf_modules_loaded = true; + + /* kernel too old to support module BTFs */ + if (!kernel_supports(FEAT_MODULE_BTF)) + return 0; + + while (true) { + err = bpf_btf_get_next_id(id, &id); + if (err && errno == ENOENT) + return 0; + if (err) { + err = -errno; + pr_warn("failed to iterate BTF objects: %d\n", err); + return err; + } + + fd = bpf_btf_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; /* expected race: BTF was unloaded */ + err = -errno; + pr_warn("failed to get BTF object #%d FD: %d\n", id, err); + return err; + } + + len = sizeof(info); + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err = -errno; + pr_warn("failed to get BTF object #%d info: %d\n", id, err); + close(fd); + return err; + } + + /* ignore non-module BTFs */ + if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) { + close(fd); + continue; + } + + btf = btf_get_from_fd(fd, obj->btf_vmlinux); + close(fd); + if (IS_ERR(btf)) { + pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n", + name, id, PTR_ERR(btf)); + return PTR_ERR(btf); + } + + err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, + sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); + if (err) + return err; + + mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; + + mod_btf->btf = btf; + mod_btf->id = id; + mod_btf->name = strdup(name); + if (!mod_btf->name) + return -ENOMEM; + } + + return 0; +} + static struct core_cand_list * bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) { struct core_cand local_cand = {}; struct core_cand_list *cands; + const struct btf *main_btf; size_t local_essent_len; - int err; + int err, i; local_cand.btf = local_btf; local_cand.t = btf__type_by_id(local_btf, local_type_id); @@ -4697,15 +4825,38 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l return ERR_PTR(-ENOMEM); /* Attempt to find target candidates in vmlinux BTF first */ - err = bpf_core_add_cands(&local_cand, local_essent_len, - obj->btf_vmlinux_override ?: obj->btf_vmlinux, - "vmlinux", 1, cands); - if (err) { - bpf_core_free_cands(cands); - return ERR_PTR(err); + main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux; + err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands); + if (err) + goto err_out; + + /* if vmlinux BTF has any candidate, don't got for module BTFs */ + if (cands->len) + return cands; + + /* if vmlinux BTF was overridden, don't attempt to load module BTFs */ + if (obj->btf_vmlinux_override) + return cands; + + /* now look through module BTFs, trying to still find candidates */ + err = load_module_btfs(obj); + if (err) + goto err_out; + + for (i = 0; i < obj->btf_module_cnt; i++) { + err = bpf_core_add_cands(&local_cand, local_essent_len, + obj->btf_modules[i].btf, + obj->btf_modules[i].name, + btf__get_nr_types(obj->btf_vmlinux) + 1, + cands); + if (err) + goto err_out; } return cands; +err_out: + bpf_core_free_cands(cands); + return ERR_PTR(err); } /* Check two types for compatibility for the purpose of field access @@ -5756,7 +5907,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (!hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { - pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld", + pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", prog->name, relo_idx, local_id, btf_kind_str(local_type), local_name, PTR_ERR(cands)); return PTR_ERR(cands); @@ -5944,7 +6095,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) } out: - /* obj->btf_vmlinux is freed at the end of object load phase */ + /* obj->btf_vmlinux and module BTFs are freed after object load */ btf__free(obj->btf_vmlinux_override); obj->btf_vmlinux_override = NULL; @@ -7316,6 +7467,14 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = err ? : bpf_object__relocate(obj, attr->target_btf_path); err = err ? : bpf_object__load_progs(obj, attr->log_level); + /* clean up module BTFs */ + for (i = 0; i < obj->btf_module_cnt; i++) { + btf__free(obj->btf_modules[i].btf); + free(obj->btf_modules[i].name); + } + free(obj->btf_modules); + + /* clean up vmlinux BTF */ btf__free(obj->btf_vmlinux); obj->btf_vmlinux = NULL; -- cgit v1.3.1 From 9f7fa225894c7fcb014f3699a402fcc4d896cb1c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:26 -0800 Subject: selftests/bpf: Add bpf_testmod kernel module for testing Add bpf_testmod module, which is conceptually out-of-tree module and provides ways for selftests/bpf to test various kernel module-related functionality: raw tracepoint, fentry/fexit/fmod_ret, etc. This module will be auto-loaded by test_progs test runner and expected by some of selftests to be present and loaded. Pahole currently isn't able to generate BTF for static functions in kernel modules, so make sure traced function is global. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201203204634.1325171-7-andrii@kernel.org --- tools/testing/selftests/bpf/.gitignore | 1 + tools/testing/selftests/bpf/Makefile | 12 +++-- tools/testing/selftests/bpf/bpf_testmod/.gitignore | 6 +++ tools/testing/selftests/bpf/bpf_testmod/Makefile | 20 ++++++++ .../selftests/bpf/bpf_testmod/bpf_testmod-events.h | 36 +++++++++++++ .../selftests/bpf/bpf_testmod/bpf_testmod.c | 52 +++++++++++++++++++ .../selftests/bpf/bpf_testmod/bpf_testmod.h | 14 +++++ tools/testing/selftests/bpf/test_progs.c | 59 ++++++++++++++++++++++ tools/testing/selftests/bpf/test_progs.h | 1 + 9 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/bpf_testmod/.gitignore create mode 100644 tools/testing/selftests/bpf/bpf_testmod/Makefile create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 395ae040ce1f..752d8edddc66 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -35,3 +35,4 @@ test_cpp /tools /runqslower /bench +*.ko diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 371b022d932c..ac25ba5d0d6c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -80,7 +80,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ - test_lirc_mode2_user xdping test_cpp runqslower bench + test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko TEST_CUSTOM_PROGS = urandom_read @@ -104,6 +104,7 @@ OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) + $(Q)$(MAKE) -C bpf_testmod clean endef include ../lib.mk @@ -136,6 +137,11 @@ $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1 +$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) + $(call msg,MOD,,$@) + $(Q)$(MAKE) $(submake_extras) -C bpf_testmod + $(Q)cp bpf_testmod/bpf_testmod.ko $@ + $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(Q)$(CC) -c $(CFLAGS) -o $@ $< @@ -388,7 +394,7 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h -TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ +TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE @@ -460,4 +466,4 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature \ - $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc) + $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko) diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/bpf_testmod/.gitignore new file mode 100644 index 000000000000..ded513777281 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_testmod/.gitignore @@ -0,0 +1,6 @@ +*.mod +*.mod.c +*.o +.ko +/Module.symvers +/modules.order diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile new file mode 100644 index 000000000000..15cb36c4483a --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_testmod/Makefile @@ -0,0 +1,20 @@ +BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_testmod.ko + +obj-m += bpf_testmod.o +CFLAGS_bpf_testmod.o = -I$(src) + +all: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean + diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h new file mode 100644 index 000000000000..b83ea448bc79 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_testmod + +#if !defined(_BPF_TESTMOD_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _BPF_TESTMOD_EVENTS_H + +#include +#include "bpf_testmod.h" + +TRACE_EVENT(bpf_testmod_test_read, + TP_PROTO(struct task_struct *task, struct bpf_testmod_test_read_ctx *ctx), + TP_ARGS(task, ctx), + TP_STRUCT__entry( + __field(pid_t, pid) + __array(char, comm, TASK_COMM_LEN) + __field(loff_t, off) + __field(size_t, len) + ), + TP_fast_assign( + __entry->pid = task->pid; + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->off = ctx->off; + __entry->len = ctx->len; + ), + TP_printk("pid=%d comm=%s off=%llu len=%zu", + __entry->pid, __entry->comm, __entry->off, __entry->len) +); + +#endif /* _BPF_TESTMOD_EVENTS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE bpf_testmod-events +#include diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c new file mode 100644 index 000000000000..2df19d73ca49 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include +#include +#include +#include +#include "bpf_testmod.h" + +#define CREATE_TRACE_POINTS +#include "bpf_testmod-events.h" + +noinline ssize_t +bpf_testmod_test_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + struct bpf_testmod_test_read_ctx ctx = { + .buf = buf, + .off = off, + .len = len, + }; + + trace_bpf_testmod_test_read(current, &ctx); + + return -EIO; /* always fail */ +} +EXPORT_SYMBOL(bpf_testmod_test_read); +ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO); + +static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { + .attr = { .name = "bpf_testmod", .mode = 0444, }, + .read = bpf_testmod_test_read, +}; + +static int bpf_testmod_init(void) +{ + return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); +} + +static void bpf_testmod_exit(void) +{ + return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); +} + +module_init(bpf_testmod_init); +module_exit(bpf_testmod_exit); + +MODULE_AUTHOR("Andrii Nakryiko"); +MODULE_DESCRIPTION("BPF selftests module"); +MODULE_LICENSE("Dual BSD/GPL"); + diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h new file mode 100644 index 000000000000..b81adfedb4f6 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +#ifndef _BPF_TESTMOD_H +#define _BPF_TESTMOD_H + +#include + +struct bpf_testmod_test_read_ctx { + char *buf; + loff_t off; + size_t len; +}; + +#endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 22943b58d752..17587754b7a7 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -360,6 +360,58 @@ err: return -1; } +static int finit_module(int fd, const char *param_values, int flags) +{ + return syscall(__NR_finit_module, fd, param_values, flags); +} + +static int delete_module(const char *name, int flags) +{ + return syscall(__NR_delete_module, name, flags); +} + +static void unload_bpf_testmod(void) +{ + if (delete_module("bpf_testmod", 0)) { + if (errno == ENOENT) { + if (env.verbosity > VERBOSE_NONE) + fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); + return; + } + fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); + exit(1); + } + if (env.verbosity > VERBOSE_NONE) + fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); +} + +static int load_bpf_testmod(void) +{ + int fd; + + /* ensure previous instance of the module is unloaded */ + unload_bpf_testmod(); + + if (env.verbosity > VERBOSE_NONE) + fprintf(stdout, "Loading bpf_testmod.ko...\n"); + + fd = open("bpf_testmod.ko", O_RDONLY); + if (fd < 0) { + fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); + return -ENOENT; + } + if (finit_module(fd, "", 0)) { + fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); + close(fd); + return -EINVAL; + } + close(fd); + + if (env.verbosity > VERBOSE_NONE) + fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); + return 0; +} + /* extern declarations for test funcs */ #define DEFINE_TEST(name) extern void test_##name(void); #include @@ -678,6 +730,11 @@ int main(int argc, char **argv) save_netns(); stdio_hijack(); + env.has_testmod = true; + if (load_bpf_testmod()) { + fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); + env.has_testmod = false; + } for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; @@ -722,6 +779,8 @@ int main(int argc, char **argv) if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); } + if (env.has_testmod) + unload_bpf_testmod(); stdio_restore(); if (env.get_test_cnt) { diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index d6b14853f3bc..115953243f62 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -66,6 +66,7 @@ struct test_env { enum verbosity verbosity; bool jit_enabled; + bool has_testmod; bool get_test_cnt; bool list_test_names; -- cgit v1.3.1 From 5ed31472b9ad6373a0a24bc21186b5eac999213d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:27 -0800 Subject: selftests/bpf: Add support for marking sub-tests as skipped Previously skipped sub-tests would be counted as passing with ":OK" appened in the log. Change that to be accounted as ":SKIP". Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-8-andrii@kernel.org --- tools/testing/selftests/bpf/test_progs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 17587754b7a7..5ef081bdae4e 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -149,15 +149,15 @@ void test__end_subtest() if (sub_error_cnt) env.fail_cnt++; - else + else if (test->skip_cnt == 0) env.sub_succ_cnt++; skip_account(); dump_test_log(test, sub_error_cnt); fprintf(env.stdout, "#%d/%d %s:%s\n", - test->test_num, test->subtest_num, - test->subtest_name, sub_error_cnt ? "FAIL" : "OK"); + test->test_num, test->subtest_num, test->subtest_name, + sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); free(test->subtest_name); test->subtest_name = NULL; -- cgit v1.3.1 From 6bcd39d366b64318562785d5b47c2837e3a53ae5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:28 -0800 Subject: selftests/bpf: Add CO-RE relocs selftest relying on kernel module BTF Add a self-tests validating libbpf is able to perform CO-RE relocations against the type defined in kernel module BTF. if bpf_testmod.o is not supported by the kernel (e.g., due to version mismatch), skip tests, instead of failing. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-9-andrii@kernel.org --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 79 +++++++++++++++++++--- .../testing/selftests/bpf/progs/core_reloc_types.h | 17 +++++ .../selftests/bpf/progs/test_core_reloc_module.c | 66 ++++++++++++++++++ 3 files changed, 151 insertions(+), 11 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_module.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 30e40ff4b0d8..bb980848cd77 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include "progs/core_reloc_types.h" +#include "bpf_testmod/bpf_testmod.h" #include #include #include @@ -9,6 +10,30 @@ static int duration = 0; #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) +#define MODULES_CASE(name, sec_name, tp_name) { \ + .case_name = name, \ + .bpf_obj_file = "test_core_reloc_module.o", \ + .btf_src_file = NULL, /* find in kernel module BTFs */ \ + .input = "", \ + .input_len = 0, \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_module_output) { \ + .read_ctx_sz = sizeof(struct bpf_testmod_test_read_ctx),\ + .read_ctx_exists = true, \ + .buf_exists = true, \ + .len_exists = true, \ + .off_exists = true, \ + .len = 123, \ + .off = 0, \ + .comm = "test_progs", \ + .comm_len = sizeof("test_progs"), \ + }, \ + .output_len = sizeof(struct core_reloc_module_output), \ + .prog_sec_name = sec_name, \ + .raw_tp_name = tp_name, \ + .trigger = trigger_module_test_read, \ + .needs_testmod = true, \ +} + #define FLAVORS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ .a = 42, \ .b = 0xc001, \ @@ -211,7 +236,7 @@ static int duration = 0; .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ __VA_ARGS__, \ .output_len = sizeof(struct core_reloc_bitfields_output), \ - .direct_raw_tp = true, \ + .prog_sec_name = "tp_btf/sys_enter", \ } @@ -222,7 +247,7 @@ static int duration = 0; }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ - .direct_raw_tp = true, \ + .prog_sec_name = "tp_btf/sys_enter", \ .fails = true, \ } @@ -309,6 +334,7 @@ static int duration = 0; struct core_reloc_test_case; typedef int (*setup_test_fn)(struct core_reloc_test_case *test); +typedef int (*trigger_test_fn)(const struct core_reloc_test_case *test); struct core_reloc_test_case { const char *case_name; @@ -319,9 +345,12 @@ struct core_reloc_test_case { const char *output; int output_len; bool fails; + bool needs_testmod; bool relaxed_core_relocs; - bool direct_raw_tp; + const char *prog_sec_name; + const char *raw_tp_name; setup_test_fn setup; + trigger_test_fn trigger; }; static int find_btf_type(const struct btf *btf, const char *name, __u32 kind) @@ -451,6 +480,23 @@ static int setup_type_id_case_failure(struct core_reloc_test_case *test) return 0; } +static int trigger_module_test_read(const struct core_reloc_test_case *test) +{ + struct core_reloc_module_output *exp = (void *)test->output; + int fd, err; + + fd = open("/sys/kernel/bpf_testmod", O_RDONLY); + err = -errno; + if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) + return err; + + read(fd, NULL, exp->len); /* request expected number of bytes */ + close(fd); + + return 0; +} + + static struct core_reloc_test_case test_cases[] = { /* validate we can find kernel image and use its BTF for relocs */ { @@ -467,6 +513,9 @@ static struct core_reloc_test_case test_cases[] = { .output_len = sizeof(struct core_reloc_kernel_output), }, + /* validate we can find kernel module BTF types for relocs/attach */ + MODULES_CASE("module", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"), + /* validate BPF program can use multiple flavors to match against * single target BTF type */ @@ -779,6 +828,11 @@ void test_core_reloc(void) if (!test__start_subtest(test_case->case_name)) continue; + if (test_case->needs_testmod && !env.has_testmod) { + test__skip(); + continue; + } + if (test_case->setup) { err = test_case->setup(test_case); if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err)) @@ -790,13 +844,11 @@ void test_core_reloc(void) test_case->bpf_obj_file, PTR_ERR(obj))) continue; - /* for typed raw tracepoints, NULL should be specified */ - if (test_case->direct_raw_tp) { - probe_name = "tp_btf/sys_enter"; - tp_name = NULL; - } else { - probe_name = "raw_tracepoint/sys_enter"; - tp_name = "sys_enter"; + probe_name = "raw_tracepoint/sys_enter"; + tp_name = "sys_enter"; + if (test_case->prog_sec_name) { + probe_name = test_case->prog_sec_name; + tp_name = test_case->raw_tp_name; /* NULL for tp_btf */ } prog = bpf_object__find_program_by_title(obj, probe_name); @@ -837,7 +889,12 @@ void test_core_reloc(void) goto cleanup; /* trigger test run */ - usleep(1); + if (test_case->trigger) { + if (!ASSERT_OK(test_case->trigger(test_case), "test_trigger")) + goto cleanup; + } else { + usleep(1); + } if (data->skip) { test__skip(); diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index e6e616cb7bc9..9a2850850121 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -15,6 +15,23 @@ struct core_reloc_kernel_output { int comm_len; }; +/* + * MODULE + */ + +struct core_reloc_module_output { + long long len; + long long off; + int read_ctx_sz; + bool read_ctx_exists; + bool buf_exists; + bool len_exists; + bool off_exists; + /* we have test_progs[-flavor], so cut flavor part */ + char comm[sizeof("test_progs")]; + int comm_len; +}; + /* * FLAVORS */ diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c new file mode 100644 index 000000000000..d1840c1a9d36 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct bpf_testmod_test_read_ctx { + /* field order is mixed up */ + size_t len; + char *buf; + loff_t off; +} __attribute__((preserve_access_index)); + +struct { + char in[256]; + char out[256]; + bool skip; + uint64_t my_pid_tgid; +} data = {}; + +struct core_reloc_module_output { + long long len; + long long off; + int read_ctx_sz; + bool read_ctx_exists; + bool buf_exists; + bool len_exists; + bool off_exists; + /* we have test_progs[-flavor], so cut flavor part */ + char comm[sizeof("test_progs")]; + int comm_len; +}; + +SEC("raw_tp/bpf_testmod_test_read") +int BPF_PROG(test_core_module, + struct task_struct *task, + struct bpf_testmod_test_read_ctx *read_ctx) +{ + struct core_reloc_module_output *out = (void *)&data.out; + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 real_tgid = (__u32)(pid_tgid >> 32); + __u32 real_pid = (__u32)pid_tgid; + + if (data.my_pid_tgid != pid_tgid) + return 0; + + if (BPF_CORE_READ(task, pid) != real_pid || BPF_CORE_READ(task, tgid) != real_tgid) + return 0; + + out->len = BPF_CORE_READ(read_ctx, len); + out->off = BPF_CORE_READ(read_ctx, off); + + out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx); + out->read_ctx_exists = bpf_core_type_exists(struct bpf_testmod_test_read_ctx); + out->buf_exists = bpf_core_field_exists(read_ctx->buf); + out->off_exists = bpf_core_field_exists(read_ctx->off); + out->len_exists = bpf_core_field_exists(read_ctx->len); + + out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm); + + return 0; +} -- cgit v1.3.1 From 290248a5b7d829871b3ea3c62578613a580a1744 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:30 -0800 Subject: bpf: Allow to specify kernel module BTFs when attaching BPF programs Add ability for user-space programs to specify non-vmlinux BTF when attaching BTF-powered BPF programs: raw_tp, fentry/fexit/fmod_ret, LSM, etc. For this, attach_prog_fd (now with the alias name attach_btf_obj_fd) should specify FD of a module or vmlinux BTF object. For backwards compatibility reasons, 0 denotes vmlinux BTF. Only kernel BTF (vmlinux or module) can be specified. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-11-andrii@kernel.org --- include/linux/btf.h | 1 + include/uapi/linux/bpf.h | 7 +++- kernel/bpf/btf.c | 5 +++ kernel/bpf/syscall.c | 82 ++++++++++++++++++++++++++---------------- tools/include/uapi/linux/bpf.h | 7 +++- 5 files changed, 69 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/include/linux/btf.h b/include/linux/btf.h index fb608e4de076..4c200f5d242b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -90,6 +90,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, int btf_get_fd_by_id(u32 id); u32 btf_obj_id(const struct btf *btf); +bool btf_is_kernel(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c3458ec1f30a..1233f14f659f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -557,7 +557,12 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ - __u32 attach_prog_fd; /* 0 to attach to vmlinux */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7a19bf5bfe97..8d6bdb4f4d61 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5738,6 +5738,11 @@ u32 btf_obj_id(const struct btf *btf) return btf->id; } +bool btf_is_kernel(const struct btf *btf) +{ + return btf->kernel_btf; +} + static int btf_id_cmp_func(const void *a, const void *b) { const int *pa = a, *pb = b; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 184204169949..0cd3cc2af9c1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1926,12 +1926,16 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) static int bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type, - u32 btf_id, u32 prog_fd) + struct btf *attach_btf, u32 btf_id, + struct bpf_prog *dst_prog) { if (btf_id) { if (btf_id > BTF_MAX_TYPE) return -EINVAL; + if (!attach_btf && !dst_prog) + return -EINVAL; + switch (prog_type) { case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_LSM: @@ -1943,7 +1947,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, } } - if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING && + if (attach_btf && (!btf_id || dst_prog)) + return -EINVAL; + + if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && prog_type != BPF_PROG_TYPE_EXT) return -EINVAL; @@ -2060,7 +2067,8 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) { enum bpf_prog_type type = attr->prog_type; - struct bpf_prog *prog; + struct bpf_prog *prog, *dst_prog = NULL; + struct btf *attach_btf = NULL; int err; char license[128]; bool is_gpl; @@ -2102,44 +2110,56 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (is_perfmon_prog_type(type) && !perfmon_capable()) return -EPERM; + /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog + * or btf, we need to check which one it is + */ + if (attr->attach_prog_fd) { + dst_prog = bpf_prog_get(attr->attach_prog_fd); + if (IS_ERR(dst_prog)) { + dst_prog = NULL; + attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); + if (IS_ERR(attach_btf)) + return -EINVAL; + if (!btf_is_kernel(attach_btf)) { + btf_put(attach_btf); + return -EINVAL; + } + } + } else if (attr->attach_btf_id) { + /* fall back to vmlinux BTF, if BTF type ID is specified */ + attach_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(attach_btf)) + return PTR_ERR(attach_btf); + if (!attach_btf) + return -EINVAL; + btf_get(attach_btf); + } + bpf_prog_load_fixup_attach_type(attr); if (bpf_prog_load_check_attach(type, attr->expected_attach_type, - attr->attach_btf_id, - attr->attach_prog_fd)) + attach_btf, attr->attach_btf_id, + dst_prog)) { + if (dst_prog) + bpf_prog_put(dst_prog); + if (attach_btf) + btf_put(attach_btf); return -EINVAL; + } /* plain bpf_prog allocation */ prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); - if (!prog) + if (!prog) { + if (dst_prog) + bpf_prog_put(dst_prog); + if (attach_btf) + btf_put(attach_btf); return -ENOMEM; + } prog->expected_attach_type = attr->expected_attach_type; + prog->aux->attach_btf = attach_btf; prog->aux->attach_btf_id = attr->attach_btf_id; - - if (attr->attach_btf_id && !attr->attach_prog_fd) { - struct btf *btf; - - btf = bpf_get_btf_vmlinux(); - if (IS_ERR(btf)) - return PTR_ERR(btf); - if (!btf) - return -EINVAL; - - btf_get(btf); - prog->aux->attach_btf = btf; - } - - if (attr->attach_prog_fd) { - struct bpf_prog *dst_prog; - - dst_prog = bpf_prog_get(attr->attach_prog_fd); - if (IS_ERR(dst_prog)) { - err = PTR_ERR(dst_prog); - goto free_prog; - } - prog->aux->dst_prog = dst_prog; - } - + prog->aux->dst_prog = dst_prog; prog->aux->offload_requested = !!attr->prog_ifindex; prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c3458ec1f30a..1233f14f659f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -557,7 +557,12 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ - __u32 attach_prog_fd; /* 0 to attach to vmlinux */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.3.1 From 6aef10a481a3f42c8021fe410e07440c0d71a5fc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:31 -0800 Subject: libbpf: Factor out low-level BPF program loading helper Refactor low-level API for BPF program loading to not rely on public API types. This allows painless extension without constant efforts to cleverly not break backwards compatibility. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-12-andrii@kernel.org --- tools/lib/bpf/bpf.c | 100 +++++++++++++++++++++++++++------------- tools/lib/bpf/libbpf.c | 34 +++++++------- tools/lib/bpf/libbpf_internal.h | 29 ++++++++++++ 3 files changed, 113 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 4025266d0fb0..5d681ce32b37 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -215,59 +215,52 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt, return info; } -int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz) +int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) { void *finfo = NULL, *linfo = NULL; union bpf_attr attr; - __u32 log_level; int fd; - if (!load_attr || !log_buf != !log_buf_sz) + if (!load_attr->log_buf != !load_attr->log_buf_sz) return -EINVAL; - log_level = load_attr->log_level; - if (log_level > (4 | 2 | 1) || (log_level && !log_buf)) + if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf)) return -EINVAL; memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; - if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS || - attr.prog_type == BPF_PROG_TYPE_LSM) { - attr.attach_btf_id = load_attr->attach_btf_id; - } else if (attr.prog_type == BPF_PROG_TYPE_TRACING || - attr.prog_type == BPF_PROG_TYPE_EXT) { - attr.attach_btf_id = load_attr->attach_btf_id; - attr.attach_prog_fd = load_attr->attach_prog_fd; - } else { - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = load_attr->kern_version; - } - attr.insn_cnt = (__u32)load_attr->insns_cnt; + + attr.attach_btf_id = load_attr->attach_btf_id; + attr.attach_prog_fd = load_attr->attach_prog_fd; + + attr.prog_ifindex = load_attr->prog_ifindex; + attr.kern_version = load_attr->kern_version; + + attr.insn_cnt = (__u32)load_attr->insn_cnt; attr.insns = ptr_to_u64(load_attr->insns); attr.license = ptr_to_u64(load_attr->license); - attr.log_level = log_level; - if (log_level) { - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_buf_sz; - } else { - attr.log_buf = ptr_to_u64(NULL); - attr.log_size = 0; + attr.log_level = load_attr->log_level; + if (attr.log_level) { + attr.log_buf = ptr_to_u64(load_attr->log_buf); + attr.log_size = load_attr->log_buf_sz; } attr.prog_btf_fd = load_attr->prog_btf_fd; + attr.prog_flags = load_attr->prog_flags; + attr.func_info_rec_size = load_attr->func_info_rec_size; attr.func_info_cnt = load_attr->func_info_cnt; attr.func_info = ptr_to_u64(load_attr->func_info); + attr.line_info_rec_size = load_attr->line_info_rec_size; attr.line_info_cnt = load_attr->line_info_cnt; attr.line_info = ptr_to_u64(load_attr->line_info); + if (load_attr->name) memcpy(attr.prog_name, load_attr->name, - min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); - attr.prog_flags = load_attr->prog_flags; + min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) @@ -307,19 +300,19 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, } fd = sys_bpf_prog_load(&attr, sizeof(attr)); - if (fd >= 0) goto done; } - if (log_level || !log_buf) + if (load_attr->log_level || !load_attr->log_buf) goto done; /* Try again with log */ - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_buf_sz; + attr.log_buf = ptr_to_u64(load_attr->log_buf); + attr.log_size = load_attr->log_buf_sz; attr.log_level = 1; - log_buf[0] = 0; + load_attr->log_buf[0] = 0; + fd = sys_bpf_prog_load(&attr, sizeof(attr)); done: free(finfo); @@ -327,6 +320,49 @@ done: return fd; } +int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz) +{ + struct bpf_prog_load_params p = {}; + + if (!load_attr || !log_buf != !log_buf_sz) + return -EINVAL; + + p.prog_type = load_attr->prog_type; + p.expected_attach_type = load_attr->expected_attach_type; + switch (p.prog_type) { + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_LSM: + p.attach_btf_id = load_attr->attach_btf_id; + break; + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_EXT: + p.attach_btf_id = load_attr->attach_btf_id; + p.attach_prog_fd = load_attr->attach_prog_fd; + break; + default: + p.prog_ifindex = load_attr->prog_ifindex; + p.kern_version = load_attr->kern_version; + } + p.insn_cnt = load_attr->insns_cnt; + p.insns = load_attr->insns; + p.license = load_attr->license; + p.log_level = load_attr->log_level; + p.log_buf = log_buf; + p.log_buf_sz = log_buf_sz; + p.prog_btf_fd = load_attr->prog_btf_fd; + p.func_info_rec_size = load_attr->func_info_rec_size; + p.func_info_cnt = load_attr->func_info_cnt; + p.func_info = load_attr->func_info; + p.line_info_rec_size = load_attr->line_info_rec_size; + p.line_info_cnt = load_attr->line_info_cnt; + p.line_info = load_attr->line_info; + p.name = load_attr->name; + p.prog_flags = load_attr->prog_flags; + + return libbpf__bpf_prog_load(&p); +} + int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ca20e493726d..103d66e27406 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6809,7 +6809,7 @@ static int load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *license, __u32 kern_version, int *pfd) { - struct bpf_load_program_attr load_attr; + struct bpf_prog_load_params load_attr = {}; char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL; @@ -6828,7 +6828,6 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, if (!insns || !insns_cnt) return -EINVAL; - memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); load_attr.prog_type = prog->type; /* old kernels might not support specifying expected_attach_type */ if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def && @@ -6839,19 +6838,14 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, if (kernel_supports(FEAT_PROG_NAME)) load_attr.name = prog->name; load_attr.insns = insns; - load_attr.insns_cnt = insns_cnt; + load_attr.insn_cnt = insns_cnt; load_attr.license = license; - if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || - prog->type == BPF_PROG_TYPE_LSM) { - load_attr.attach_btf_id = prog->attach_btf_id; - } else if (prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_EXT) { - load_attr.attach_prog_fd = prog->attach_prog_fd; - load_attr.attach_btf_id = prog->attach_btf_id; - } else { - load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog->prog_ifindex; - } + load_attr.attach_btf_id = prog->attach_btf_id; + load_attr.attach_prog_fd = prog->attach_prog_fd; + load_attr.attach_btf_id = prog->attach_btf_id; + load_attr.kern_version = kern_version; + load_attr.prog_ifindex = prog->prog_ifindex; + /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(prog->obj); if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) { @@ -6875,7 +6869,9 @@ retry_load: *log_buf = 0; } - ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); + load_attr.log_buf = log_buf; + load_attr.log_buf_sz = log_buf_size; + ret = libbpf__bpf_prog_load(&load_attr); if (ret >= 0) { if (log_buf && load_attr.log_level) @@ -6916,9 +6912,9 @@ retry_load: pr_warn("-- BEGIN DUMP LOG ---\n"); pr_warn("\n%s\n", log_buf); pr_warn("-- END LOG --\n"); - } else if (load_attr.insns_cnt >= BPF_MAXINSNS) { + } else if (load_attr.insn_cnt >= BPF_MAXINSNS) { pr_warn("Program too large (%zu insns), at most %d insns\n", - load_attr.insns_cnt, BPF_MAXINSNS); + load_attr.insn_cnt, BPF_MAXINSNS); ret = -LIBBPF_ERRNO__PROG2BIG; } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { /* Wrong program type? */ @@ -6926,7 +6922,9 @@ retry_load: load_attr.prog_type = BPF_PROG_TYPE_KPROBE; load_attr.expected_attach_type = 0; - fd = bpf_load_program_xattr(&load_attr, NULL, 0); + load_attr.log_buf = NULL; + load_attr.log_buf_sz = 0; + fd = libbpf__bpf_prog_load(&load_attr); if (fd >= 0) { close(fd); ret = -LIBBPF_ERRNO__PROGTYPE; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index e569ae63808e..681073a67ae3 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -151,6 +151,35 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); +struct bpf_prog_load_params { + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + const char *name; + const struct bpf_insn *insns; + size_t insn_cnt; + const char *license; + __u32 kern_version; + __u32 attach_prog_fd; + __u32 attach_btf_id; + __u32 prog_ifindex; + __u32 prog_btf_fd; + __u32 prog_flags; + + __u32 func_info_rec_size; + const void *func_info; + __u32 func_info_cnt; + + __u32 line_info_rec_size; + const void *line_info; + __u32 line_info_cnt; + + __u32 log_level; + char *log_buf; + size_t log_buf_sz; +}; + +int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); + int bpf_object__section_size(const struct bpf_object *obj, const char *name, __u32 *size); int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, -- cgit v1.3.1 From 91abb4a6d79df6c4dcd86d85632df53c8cca2dcf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:32 -0800 Subject: libbpf: Support attachment of BPF tracing programs to kernel modules Teach libbpf to search for BTF types in kernel modules for tracing BPF programs. This allows attachment of raw_tp/fentry/fexit/fmod_ret/etc BPF program types to tracepoints and functions in kernel modules. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-13-andrii@kernel.org --- tools/lib/bpf/bpf.c | 5 +- tools/lib/bpf/libbpf.c | 138 +++++++++++++++++++++++++++++++--------- tools/lib/bpf/libbpf_internal.h | 1 + 3 files changed, 112 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5d681ce32b37..bba48ff4c5c0 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -231,8 +231,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; + if (load_attr->attach_prog_fd) + attr.attach_prog_fd = load_attr->attach_prog_fd; + else + attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd; attr.attach_btf_id = load_attr->attach_btf_id; - attr.attach_prog_fd = load_attr->attach_prog_fd; attr.prog_ifindex = load_attr->prog_ifindex; attr.kern_version = load_attr->kern_version; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 103d66e27406..912e01b946fe 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -278,6 +278,7 @@ struct bpf_program { enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; int prog_ifindex; + __u32 attach_btf_obj_fd; __u32 attach_btf_id; __u32 attach_prog_fd; void *func_info; @@ -408,6 +409,7 @@ struct module_btf { struct btf *btf; char *name; __u32 id; + int fd; }; struct bpf_object { @@ -4766,8 +4768,7 @@ static int load_module_btfs(struct bpf_object *obj) if (err) { err = -errno; pr_warn("failed to get BTF object #%d info: %d\n", id, err); - close(fd); - return err; + goto err_out; } /* ignore non-module BTFs */ @@ -4777,25 +4778,33 @@ static int load_module_btfs(struct bpf_object *obj) } btf = btf_get_from_fd(fd, obj->btf_vmlinux); - close(fd); if (IS_ERR(btf)) { pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n", name, id, PTR_ERR(btf)); - return PTR_ERR(btf); + err = PTR_ERR(btf); + goto err_out; } err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); if (err) - return err; + goto err_out; mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; mod_btf->btf = btf; mod_btf->id = id; + mod_btf->fd = fd; mod_btf->name = strdup(name); - if (!mod_btf->name) - return -ENOMEM; + if (!mod_btf->name) { + err = -ENOMEM; + goto err_out; + } + continue; + +err_out: + close(fd); + return err; } return 0; @@ -6841,7 +6850,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.insn_cnt = insns_cnt; load_attr.license = license; load_attr.attach_btf_id = prog->attach_btf_id; - load_attr.attach_prog_fd = prog->attach_prog_fd; + if (prog->attach_prog_fd) + load_attr.attach_prog_fd = prog->attach_prog_fd; + else + load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; @@ -6937,11 +6949,11 @@ out: return ret; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog); +static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id); int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { - int err = 0, fd, i, btf_id; + int err = 0, fd, i; if (prog->obj->loaded) { pr_warn("prog '%s': can't load after object was loaded\n", prog->name); @@ -6951,10 +6963,14 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { - btf_id = libbpf_find_attach_btf_id(prog); - if (btf_id <= 0) - return btf_id; - prog->attach_btf_id = btf_id; + int btf_obj_fd = 0, btf_type_id = 0; + + err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id); + if (err) + return err; + + prog->attach_btf_obj_fd = btf_obj_fd; + prog->attach_btf_id = btf_type_id; } if (prog->instances.nr < 0 || !prog->instances.fds) { @@ -7467,6 +7483,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { + close(obj->btf_modules[i].fd); btf__free(obj->btf_modules[i].btf); free(obj->btf_modules[i].name); } @@ -8821,8 +8838,8 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, return btf__find_by_name_kind(btf, btf_type_name, kind); } -static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, - enum bpf_attach_type attach_type) +static inline int find_attach_btf_id(struct btf *btf, const char *name, + enum bpf_attach_type attach_type) { int err; @@ -8838,9 +8855,6 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, else err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); - if (err <= 0) - pr_warn("%s is not found in vmlinux BTF\n", name); - return err; } @@ -8856,7 +8870,10 @@ int libbpf_find_vmlinux_btf_id(const char *name, return -EINVAL; } - err = __find_vmlinux_btf_id(btf, name, attach_type); + err = find_attach_btf_id(btf, name, attach_type); + if (err <= 0) + pr_warn("%s is not found in vmlinux BTF\n", name); + btf__free(btf); return err; } @@ -8894,11 +8911,49 @@ out: return err; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog) +static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, + enum bpf_attach_type attach_type, + int *btf_obj_fd, int *btf_type_id) +{ + int ret, i; + + ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type); + if (ret > 0) { + *btf_obj_fd = 0; /* vmlinux BTF */ + *btf_type_id = ret; + return 0; + } + if (ret != -ENOENT) + return ret; + + ret = load_module_btfs(obj); + if (ret) + return ret; + + for (i = 0; i < obj->btf_module_cnt; i++) { + const struct module_btf *mod = &obj->btf_modules[i]; + + ret = find_attach_btf_id(mod->btf, attach_name, attach_type); + if (ret > 0) { + *btf_obj_fd = mod->fd; + *btf_type_id = ret; + return 0; + } + if (ret == -ENOENT) + continue; + + return ret; + } + + return -ESRCH; +} + +static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id) { enum bpf_attach_type attach_type = prog->expected_attach_type; __u32 attach_prog_fd = prog->attach_prog_fd; - const char *name = prog->sec_name; + const char *name = prog->sec_name, *attach_name; + const struct bpf_sec_def *sec = NULL; int i, err; if (!name) @@ -8909,17 +8964,37 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog) continue; if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; - if (attach_prog_fd) - err = libbpf_find_prog_btf_id(name + section_defs[i].len, - attach_prog_fd); - else - err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, - name + section_defs[i].len, - attach_type); + + sec = §ion_defs[i]; + break; + } + + if (!sec) { + pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name); + return -ESRCH; + } + attach_name = name + sec->len; + + /* BPF program's BTF ID */ + if (attach_prog_fd) { + err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); + if (err < 0) { + pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n", + attach_prog_fd, attach_name, err); + return err; + } + *btf_obj_fd = 0; + *btf_type_id = err; + return 0; + } + + /* kernel/module BTF ID */ + err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + if (err) { + pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err); return err; } - pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); - return -ESRCH; + return 0; } int libbpf_attach_type_by_name(const char *name, @@ -10808,6 +10883,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog, return btf_id; prog->attach_btf_id = btf_id; + prog->attach_btf_obj_fd = 0; prog->attach_prog_fd = attach_prog_fd; return 0; } diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 681073a67ae3..969d0ac592ba 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -160,6 +160,7 @@ struct bpf_prog_load_params { const char *license; __u32 kern_version; __u32 attach_prog_fd; + __u32 attach_btf_obj_fd; __u32 attach_btf_id; __u32 prog_ifindex; __u32 prog_btf_fd; -- cgit v1.3.1 From bc9ed69c79ae7577314a24e09c5b0d1c1c314ced Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:33 -0800 Subject: selftests/bpf: Add tp_btf CO-RE reloc test for modules Add another CO-RE relocation test for kernel module relocations. This time for tp_btf with direct memory reads. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-14-andrii@kernel.org --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 3 +- .../selftests/bpf/progs/test_core_reloc_module.c | 32 +++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index bb980848cd77..06eb956ff7bb 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -514,7 +514,8 @@ static struct core_reloc_test_case test_cases[] = { }, /* validate we can find kernel module BTF types for relocs/attach */ - MODULES_CASE("module", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"), + MODULES_CASE("module_probed", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"), + MODULES_CASE("module_direct", "tp_btf/bpf_testmod_test_read", NULL), /* validate BPF program can use multiple flavors to match against * single target BTF type diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c index d1840c1a9d36..56363959f7b0 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c @@ -36,7 +36,7 @@ struct core_reloc_module_output { }; SEC("raw_tp/bpf_testmod_test_read") -int BPF_PROG(test_core_module, +int BPF_PROG(test_core_module_probed, struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx) { @@ -64,3 +64,33 @@ int BPF_PROG(test_core_module, return 0; } + +SEC("tp_btf/bpf_testmod_test_read") +int BPF_PROG(test_core_module_direct, + struct task_struct *task, + struct bpf_testmod_test_read_ctx *read_ctx) +{ + struct core_reloc_module_output *out = (void *)&data.out; + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 real_tgid = (__u32)(pid_tgid >> 32); + __u32 real_pid = (__u32)pid_tgid; + + if (data.my_pid_tgid != pid_tgid) + return 0; + + if (task->pid != real_pid || task->tgid != real_tgid) + return 0; + + out->len = read_ctx->len; + out->off = read_ctx->off; + + out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx); + out->read_ctx_exists = bpf_core_type_exists(struct bpf_testmod_test_read_ctx); + out->buf_exists = bpf_core_field_exists(read_ctx->buf); + out->off_exists = bpf_core_field_exists(read_ctx->off); + out->len_exists = bpf_core_field_exists(read_ctx->len); + + out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm); + + return 0; +} -- cgit v1.3.1 From 1e38abefcfd65f3ef7b12895dfd48db80aca28da Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:34 -0800 Subject: selftests/bpf: Add fentry/fexit/fmod_ret selftest for kernel module Add new selftest checking attachment of fentry/fexit/fmod_ret (and raw tracepoint ones for completeness) BPF programs to kernel module function. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-15-andrii@kernel.org --- .../selftests/bpf/prog_tests/module_attach.c | 53 +++++++++++++++++ .../selftests/bpf/progs/test_module_attach.c | 66 ++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/module_attach.c create mode 100644 tools/testing/selftests/bpf/progs/test_module_attach.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c new file mode 100644 index 000000000000..4b65e9918764 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include "test_module_attach.skel.h" + +static int duration; + +static int trigger_module_test_read(int read_sz) +{ + int fd, err; + + fd = open("/sys/kernel/bpf_testmod", O_RDONLY); + err = -errno; + if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) + return err; + + read(fd, NULL, read_sz); + close(fd); + + return 0; +} + +void test_module_attach(void) +{ + const int READ_SZ = 456; + struct test_module_attach* skel; + struct test_module_attach__bss *bss; + int err; + + skel = test_module_attach__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + bss = skel->bss; + + err = test_module_attach__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read"); + + ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp"); + ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf"); + ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry"); + ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit"); + ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); + ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); + +cleanup: + test_module_attach__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c new file mode 100644 index 000000000000..b563563df172 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +__u32 raw_tp_read_sz = 0; + +SEC("raw_tp/bpf_testmod_test_read") +int BPF_PROG(handle_raw_tp, + struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx) +{ + raw_tp_read_sz = BPF_CORE_READ(read_ctx, len); + return 0; +} + +__u32 tp_btf_read_sz = 0; + +SEC("tp_btf/bpf_testmod_test_read") +int BPF_PROG(handle_tp_btf, + struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx) +{ + tp_btf_read_sz = read_ctx->len; + return 0; +} + +__u32 fentry_read_sz = 0; + +SEC("fentry/bpf_testmod_test_read") +int BPF_PROG(handle_fentry, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + fentry_read_sz = len; + return 0; +} + +__u32 fexit_read_sz = 0; +int fexit_ret = 0; + +SEC("fexit/bpf_testmod_test_read") +int BPF_PROG(handle_fexit, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len, + int ret) +{ + fexit_read_sz = len; + fexit_ret = ret; + return 0; +} + +__u32 fmod_ret_read_sz = 0; + +SEC("fmod_ret/bpf_testmod_test_read") +int BPF_PROG(handle_fmod_ret, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + fmod_ret_read_sz = len; + return 0; /* don't override the exit code */ +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.3.1 From 3015b500ae42356936b9b4a8b660eacaee7a6147 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 15:54:39 -0800 Subject: libbpf: Use memcpy instead of strncpy to please GCC Some versions of GCC are really nit-picky about strncpy() use. Use memcpy(), as they are pretty much equivalent for the case of fixed length strings. Fixes: e459f49b4394 ("libbpf: Separate XDP program load with xsk socket creation") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203235440.2302137-1-andrii@kernel.org --- tools/lib/bpf/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 4b051ec7cfbb..e3e41ceeb1bc 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -583,7 +583,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) } ctx->ifindex = ifindex; - strncpy(ctx->ifname, ifname, IFNAMSIZ - 1); + memcpy(ctx->ifname, ifname, IFNAMSIZ -1); ctx->ifname[IFNAMSIZ - 1] = 0; xsk->ctx = ctx; -- cgit v1.3.1 From eceae70bdeaeb6b8ceb662983cf663ff352fbc96 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 15:54:40 -0800 Subject: selftests/bpf: Fix invalid use of strncat in test_sockmap strncat()'s third argument is how many bytes will be added *in addition* to already existing bytes in destination. Plus extra zero byte will be added after that. So existing use in test_sockmap has many opportunities to overflow the string and cause memory corruptions. And in this case, GCC complains for a good reason. Fixes: 16962b2404ac ("bpf: sockmap, add selftests") Fixes: 73563aa3d977 ("selftests/bpf: test_sockmap, print additional test options") Fixes: 1ade9abadfca ("bpf: test_sockmap, add options for msg_pop_data() helper") Fixes: 463bac5f1ca7 ("bpf, selftests: Add test for ktls with skb bpf ingress policy") Fixes: e9dd904708c4 ("bpf: add tls support for testing in test_sockmap") Fixes: 753fb2ee0934 ("bpf: sockmap, add msg_peek tests to test_sockmap") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203235440.2302137-2-andrii@kernel.org --- tools/testing/selftests/bpf/test_sockmap.c | 36 +++++++++++++++++++----------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 0fa1e421c3d7..427ca00a3217 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1273,6 +1273,16 @@ static char *test_to_str(int test) return "unknown"; } +static void append_str(char *dst, const char *src, size_t dst_cap) +{ + size_t avail = dst_cap - strlen(dst); + + if (avail <= 1) /* just zero byte could be written */ + return; + + strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */ +} + #define OPTSTRING 60 static void test_options(char *options) { @@ -1281,42 +1291,42 @@ static void test_options(char *options) memset(options, 0, OPTSTRING); if (txmsg_pass) - strncat(options, "pass,", OPTSTRING); + append_str(options, "pass,", OPTSTRING); if (txmsg_redir) - strncat(options, "redir,", OPTSTRING); + append_str(options, "redir,", OPTSTRING); if (txmsg_drop) - strncat(options, "drop,", OPTSTRING); + append_str(options, "drop,", OPTSTRING); if (txmsg_apply) { snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_cork) { snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_start) { snprintf(tstr, OPTSTRING, "start %d,", txmsg_start); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_end) { snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_start_pop) { snprintf(tstr, OPTSTRING, "pop (%d,%d),", txmsg_start_pop, txmsg_start_pop + txmsg_pop); - strncat(options, tstr, OPTSTRING); + append_str(options, tstr, OPTSTRING); } if (txmsg_ingress) - strncat(options, "ingress,", OPTSTRING); + append_str(options, "ingress,", OPTSTRING); if (txmsg_redir_skb) - strncat(options, "redir_skb,", OPTSTRING); + append_str(options, "redir_skb,", OPTSTRING); if (txmsg_ktls_skb) - strncat(options, "ktls_skb,", OPTSTRING); + append_str(options, "ktls_skb,", OPTSTRING); if (ktls) - strncat(options, "ktls,", OPTSTRING); + append_str(options, "ktls,", OPTSTRING); if (peek_flag) - strncat(options, "peek,", OPTSTRING); + append_str(options, "peek,", OPTSTRING); } static int __test_exec(int cgrp, int test, struct sockmap_options *opt) -- cgit v1.3.1 From 23afeaeff3d985b07abf2c76fd12b8c548da8367 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 18 Nov 2020 11:47:46 +0100 Subject: selftests: core: add tests for CLOSE_RANGE_CLOEXEC check that close_range(initial_fd, last_fd, CLOSE_RANGE_CLOEXEC) correctly sets the close-on-exec bit for the specified file descriptors. Open 100 file descriptors and set the close-on-exec flag for a subset of them first, then set it for every file descriptor above 2. Make sure RLIMIT_NOFILE doesn't affect the result. Signed-off-by: Giuseppe Scrivano Link: https://lore.kernel.org/r/20201118104746.873084-3-gscrivan@redhat.com Signed-off-by: Christian Brauner --- tools/testing/selftests/core/close_range_test.c | 74 +++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 575b391ddc78..87e16d65d9d7 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "../kselftest_harness.h" #include "../clone3/clone3_selftests.h" @@ -23,6 +24,10 @@ #define CLOSE_RANGE_UNSHARE (1U << 1) #endif +#ifndef CLOSE_RANGE_CLOEXEC +#define CLOSE_RANGE_CLOEXEC (1U << 2) +#endif + static inline int sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags) { @@ -224,4 +229,73 @@ TEST(close_range_unshare_capped) EXPECT_EQ(0, WEXITSTATUS(status)); } +TEST(close_range_cloexec) +{ + int i, ret; + int open_fds[101]; + struct rlimit rlimit; + + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { + int fd; + + fd = open("/dev/null", O_RDONLY); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + XFAIL(return, "Skipping test since /dev/null does not exist"); + } + + open_fds[i] = fd; + } + + ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); + if (ret < 0) { + if (errno == ENOSYS) + XFAIL(return, "close_range() syscall not supported"); + if (errno == EINVAL) + XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); + } + + /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ + ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); + rlimit.rlim_cur = 25; + ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); + + /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ + ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC); + ASSERT_EQ(0, ret); + ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC); + ASSERT_EQ(0, ret); + + for (i = 0; i <= 50; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + for (i = 51; i <= 74; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + } + + for (i = 75; i <= 100; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + /* Test a common pattern. */ + ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC); + for (i = 0; i <= 100; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } +} + + TEST_HARNESS_MAIN -- cgit v1.3.1 From 7d17167244f5415bc6bc90f5bb0074b6d79676b4 Mon Sep 17 00:00:00 2001 From: Florian Lehner Date: Fri, 4 Dec 2020 19:18:27 +0100 Subject: selftests/bpf: Print reason when a tester could not run a program Commit 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported program types") added a check to skip unsupported program types. As bpf_probe_prog_type can change errno, do_single_test should save it before printing a reason why a supported BPF program type failed to load. Fixes: 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported program types") Signed-off-by: Florian Lehner Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201204181828.11974-2-dev@der-flo.net --- tools/testing/selftests/bpf/test_verifier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 4bfe3aa2cfc4..ceea9409639e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -936,6 +936,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; + int saved_errno; int fixup_skips; __u32 pflags; int i, err; @@ -997,6 +998,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); + saved_errno = errno; /* BPF_PROG_TYPE_TRACING requires more setup and * bpf_probe_prog_type won't give correct answer @@ -1013,7 +1015,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) { if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", - strerror(errno)); + strerror(saved_errno)); goto fail_log; } #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -- cgit v1.3.1 From 5f61b7c6975b03e6ace2cfb13d415d5f475c8830 Mon Sep 17 00:00:00 2001 From: Florian Lehner Date: Fri, 4 Dec 2020 19:18:28 +0100 Subject: selftests/bpf: Avoid errno clobbering Print a message when the returned error is about a program type being not supported or because of permission problems. These messages are expected if the program to test was actually executed. Signed-off-by: Florian Lehner Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201204181828.11974-3-dev@der-flo.net --- tools/testing/selftests/bpf/test_verifier.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index ceea9409639e..777a81404fdb 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -875,19 +875,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, __u8 tmp[TEST_DATA_LEN << 2]; __u32 size_tmp = sizeof(tmp); uint32_t retval; - int err; + int err, saved_errno; if (unpriv) set_admin(true); err = bpf_prog_test_run(fd_prog, 1, data, size_data, tmp, &size_tmp, &retval, NULL); + saved_errno = errno; + if (unpriv) set_admin(false); - if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { - printf("Unexpected bpf_prog_test_run error "); - return err; + + if (err) { + switch (saved_errno) { + case 524/*ENOTSUPP*/: + printf("Did not run the program (not supported) "); + return 0; + case EPERM: + if (unpriv) { + printf("Did not run the program (no permission) "); + return 0; + } + /* fallthrough; */ + default: + printf("FAIL: Unexpected bpf_prog_test_run error (%s) ", + strerror(saved_errno)); + return err; + } } - if (!err && retval != expected_val && + + if (retval != expected_val && expected_val != POINTER_VALUE) { printf("FAIL retval %d != %d ", retval, expected_val); return 1; -- cgit v1.3.1 From 2195444e09b4fd3488a69e2f269a401dd4e4f512 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Wed, 2 Dec 2020 14:05:16 +0100 Subject: selftests: add selftest for the SRv6 End.DT4 behavior this selftest is designed for evaluating the new SRv6 End.DT4 behavior used, in this example, for implementing IPv4 L3 VPN use cases. Signed-off-by: Andrea Mayer Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- .../selftests/net/srv6_end_dt4_l3vpn_test.sh | 494 +++++++++++++++++++++ 1 file changed, 494 insertions(+) create mode 100755 tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh new file mode 100755 index 000000000000..ad7a9fc59934 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -0,0 +1,494 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer + +# This test is designed for evaluating the new SRv6 End.DT4 behavior used for +# implementing IPv4 L3 VPN use cases. +# +# Hereafter a network diagram is shown, where two different tenants (named 100 +# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each +# other across an IPv6 network. +# +# Only hosts belonging to the same tenant (and to the same VPN) can communicate +# with each other. Instead, the communication among hosts of different tenants +# is forbidden. +# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4 +# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the +# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200 +# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice +# versa. +# +# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DT4 behavior and c) VRF. +# +# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-t100-1 pings +# the host hs-t100-2. +# +# First of all, L2 reachability of the host hs-t100-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the +# router rt-1 receives the packet on the internal veth-t100 interface. Such +# interface is enslaved to the VRF vrf-100 whose associated table contains the +# SRv6 Encap route for encapsulating any IPv4 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DT4 behavior removes the outer IPv6+SRH headers and +# performs the lookup on the vrf-100 table using the destination address of +# the decapsulated IPv4 packet. Afterwards, the packet is sent to the host +# hs-t100-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN +# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to +# connect with each other. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-t100-1 netns | | hs-t100-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | | +# | +-------+-------+ | localsid | | | | localsid | +-------+-------- | +# | | | table | | | | table | | | +# | +----+----+ +----------+ | | +----------+ +----+----+ | +# | | vrf-100 | | | | vrf-100 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | veth0 | | | | veth0 | | +# | | fd00::1/64 |.|...|.| fd00::2/64 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | vrf-200 | | | | vrf-200 | | +# | +----+----+ | | +----+----+ | +# | | | | | | +# | +-------+-------+ | | +-------+-------- | +# | | veth-t200 | | | | veth-t200 | | +# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | | +# | +---------------+ rt-1 netns | | rt-2 netns +---------------- | +# | . | | . | +# +-----------------------------------+ +-----------------------------------+ +# . . +# . . +# . . +# . . +# +-------------------+ +-------------------+ +# | . | | . | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | | +# | +-------------+ | | +-------------+ | +# | | | | +# | hs-t200-3 netns | | hs-t200-4 netns | +# | | | | +# +-------------------+ +-------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table (table 90) +# +-------------------------------------------------+ +# |SID |Action | +# +-------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DT4 vrftable 100| +# +-------------------------------------------------+ +# |fc00:21:200::6004|apply SRv6 End.DT4 vrftable 200| +# +-------------------------------------------------+ +# +# rt-1: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# rt-1: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t200 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table (table 90) +# +-------------------------------------------------+ +# |SID |Action | +# +-------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DT4 vrftable 100| +# +-------------------------------------------------+ +# |fc00:12:200::6004|apply SRv6 End.DT4 vrftable 200| +# +-------------------------------------------------+ +# +# rt-2: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# rt-2: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t200 | +# +---------------------------------------------------+ +# + +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fd00 +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fc00 +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-t${tid}-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + # configure the VRF for the tenant X on the router which is directly + # connected to the source host. + ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid} + ip -netns ${rtname} link set vrf-${tid} up + + # enslave the veth-tX interface to the vrf-X in the access router + ip -netns ${rtname} link set ${rtveth} master vrf-${tid} + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0 + + ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004 + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \ + via fd00::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \ + encap seg6local action End.DT4 vrftable ${tid} dev vrf-${tid} + + # all sids for VPNs start with a common locator which is fc00::/16. + # Routes for handling the SRv6 End.DT4 behavior instances are grouped + # together in the 'localsid' table. + # + # NOTE: added only once + if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \ + grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then + ip -netns ${rtdst_name} -6 rule add \ + to ${VPN_LOCATOR_SERVICE}::/16 \ + lookup ${LOCALSID_TABLE_ID} prio 999 + fi +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 #args: host router tenant + setup_hs 2 2 100 + + # setup two hosts for the tenant 200 + # - host hs-3 is directly connected to the router rt-1; + # - host hs-4 is directly connected to the router rt-2. + setup_hs 3 1 200 + setup_hs 4 2 200 + + # setup the IPv4 L3 VPN which connects the host hs-t100-1 and host + # hs-t100-2 within the same tenant 100. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 + + # setup the IPv4 L3 VPN which connects the host hs-t200-3 and host + # hs-t200-4 within the same tenant 200. + setup_vpn_config 3 1 4 2 200 + setup_vpn_config 4 2 3 1 200 +} + +check_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + check_rt_connectivity ${rtsrc} ${rtdst} + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" +} + +check_and_log_hs_isolation() +{ + local hssrc=$1 + local tidsrc=$2 + local hsdst=$3 + local tiddst=$4 + + check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" +} + + +check_and_log_hs2gw_connectivity() +{ + local hssrc=$1 + local tid=$2 + + check_hs_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" +} + +router_tests() +{ + log_section "IPv6 routers connectivity test" + + check_and_log_rt_connectivity 1 2 + check_and_log_rt_connectivity 2 1 +} + +host2gateway_tests() +{ + log_section "IPv4 connectivity test among hosts and gateway" + + check_and_log_hs2gw_connectivity 1 100 + check_and_log_hs2gw_connectivity 2 100 + + check_and_log_hs2gw_connectivity 3 200 + check_and_log_hs2gw_connectivity 4 200 +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 + + check_and_log_hs_connectivity 3 4 200 + check_and_log_hs_connectivity 4 3 200 +} + +host_vpn_isolation_tests() +{ + local i + local j + local k + local tmp + local l1="1 2" + local l2="3 4" + local t1=100 + local t2=200 + + log_section "SRv6 VPN isolation test among hosts in different tentants" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation ${i} ${t1} ${j} ${t2} + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + tmp=${t1}; t1=${t2}; t2=${tmp} + done +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit 0 +fi + +cleanup &>/dev/null + +setup + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} -- cgit v1.3.1 From 2bc035538e167e28d900f2f51403458a05d7cc4a Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Wed, 2 Dec 2020 14:05:17 +0100 Subject: selftests: add selftest for the SRv6 End.DT6 (VRF) behavior this selftest is designed for evaluating the new SRv6 End.DT6 (VRF) behavior used, in this example, for implementing IPv6 L3 VPN use cases. Signed-off-by: Andrea Mayer Signed-off-by: Paolo Lungaroni Signed-off-by: Jakub Kicinski --- .../selftests/net/srv6_end_dt6_l3vpn_test.sh | 502 +++++++++++++++++++++ 1 file changed, 502 insertions(+) create mode 100755 tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh new file mode 100755 index 000000000000..68708f5e26a0 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh @@ -0,0 +1,502 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer +# author: Paolo Lungaroni + +# This test is designed for evaluating the new SRv6 End.DT6 behavior used for +# implementing IPv6 L3 VPN use cases. +# +# Hereafter a network diagram is shown, where two different tenants (named 100 +# and 200) offer IPv6 L3 VPN services allowing hosts to communicate with each +# other across an IPv6 network. +# +# Only hosts belonging to the same tenant (and to the same VPN) can communicate +# with each other. Instead, the communication among hosts of different tenants +# is forbidden. +# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv6 +# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the +# IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200 +# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice +# versa. +# +# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DT6 behavior and c) VRF. +# +# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-t100-1 pings +# the host hs-t100-2. +# +# First of all, L2 reachability of the host hs-t100-2 is taken into account by +# the router rt-1 which acts as a ndp proxy. +# +# When the host hs-t100-1 sends an IPv6 packet destined to hs-t100-2, the +# router rt-1 receives the packet on the internal veth-t100 interface. Such +# interface is enslaved to the VRF vrf-100 whose associated table contains the +# SRv6 Encap route for encapsulating any IPv6 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DT6 behavior removes the outer IPv6+SRH headers and +# performs the lookup on the vrf-100 table using the destination address of +# the decapsulated IPv6 packet. Afterwards, the packet is sent to the host +# hs-t100-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# Of course, the IPv6 L3 VPN for tenant 200 works exactly as the IPv6 L3 VPN +# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to +# connect with each other. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-t100-1 netns | | hs-t100-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::254/64 | +----------+ | | +----------+ | cafe::254/64 | | +# | +-------+-------+ | localsid | | | | localsid | +-------+-------- | +# | | | table | | | | table | | | +# | +----+----+ +----------+ | | +----------+ +----+----+ | +# | | vrf-100 | | | | vrf-100 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | veth0 | | | | veth0 | | +# | | fd00::1/64 |.|...|.| fd00::2/64 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | vrf-200 | | | | vrf-200 | | +# | +----+----+ | | +----+----+ | +# | | | | | | +# | +-------+-------+ | | +-------+-------- | +# | | veth-t200 | | | | veth-t200 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | +---------------+ rt-1 netns | | rt-2 netns +---------------- | +# | . | | . | +# +-----------------------------------+ +-----------------------------------+ +# . . +# . . +# . . +# . . +# +-------------------+ +-------------------+ +# | . | | . | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::3/64 | | | | cafe::4/64 | | +# | +-------------+ | | +-------------+ | +# | | | | +# | hs-t200-3 netns | | hs-t200-4 netns | +# | | | | +# +-------------------+ +-------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table (table 90) +# +-------------------------------------------------+ +# |SID |Action | +# +-------------------------------------------------+ +# |fc00:21:100::6006|apply SRv6 End.DT6 vrftable 100| +# +-------------------------------------------------+ +# |fc00:21:200::6006|apply SRv6 End.DT6 vrftable 200| +# +-------------------------------------------------+ +# +# rt-1: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6006| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# rt-1: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::4 |apply seg6 encap segs fc00:12:200::6006| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t200 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table (table 90) +# +-------------------------------------------------+ +# |SID |Action | +# +-------------------------------------------------+ +# |fc00:12:100::6006|apply SRv6 End.DT6 vrftable 100| +# +-------------------------------------------------+ +# |fc00:12:200::6006|apply SRv6 End.DT6 vrftable 200| +# +-------------------------------------------------+ +# +# rt-2: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6006| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# rt-2: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::3 |apply seg6 encap segs fc00:21:200::6006| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t200 | +# +---------------------------------------------------+ +# + +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fd00 +readonly IPv6_HS_NETWORK=cafe +readonly VPN_LOCATOR_SERVICE=fc00 +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-t${tid}-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + # configure the VRF for the tenant X on the router which is directly + # connected to the source host. + ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid} + ip -netns ${rtname} link set vrf-${tid} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + # enslave the veth-tX interface to the vrf-X in the access router + ip -netns ${rtname} link set ${rtveth} master vrf-${tid} + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 + + ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \ + via fd00::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \ + encap seg6local action End.DT6 vrftable ${tid} dev vrf-${tid} + + # all sids for VPNs start with a common locator which is fc00::/16. + # Routes for handling the SRv6 End.DT6 behavior instances are grouped + # together in the 'localsid' table. + # + # NOTE: added only once + if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \ + grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then + ip -netns ${rtdst_name} -6 rule add \ + to ${VPN_LOCATOR_SERVICE}::/16 \ + lookup ${LOCALSID_TABLE_ID} prio 999 + fi +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 #args: host router tenant + setup_hs 2 2 100 + + # setup two hosts for the tenant 200 + # - host hs-3 is directly connected to the router rt-1; + # - host hs-4 is directly connected to the router rt-2. + setup_hs 3 1 200 + setup_hs 4 2 200 + + # setup the IPv6 L3 VPN which connects the host hs-t100-1 and host + # hs-t100-2 within the same tenant 100. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 + + # setup the IPv6 L3 VPN which connects the host hs-t200-3 and host + # hs-t200-4 within the same tenant 200. + setup_vpn_config 3 1 4 2 200 + setup_vpn_config 4 2 3 1 200 +} + +check_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + check_rt_connectivity ${rtsrc} ${rtdst} + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" +} + +check_and_log_hs_isolation() +{ + local hssrc=$1 + local tidsrc=$2 + local hsdst=$3 + local tiddst=$4 + + check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" +} + + +check_and_log_hs2gw_connectivity() +{ + local hssrc=$1 + local tid=$2 + + check_hs_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" +} + +router_tests() +{ + log_section "IPv6 routers connectivity test" + + check_and_log_rt_connectivity 1 2 + check_and_log_rt_connectivity 2 1 +} + +host2gateway_tests() +{ + log_section "IPv6 connectivity test among hosts and gateway" + + check_and_log_hs2gw_connectivity 1 100 + check_and_log_hs2gw_connectivity 2 100 + + check_and_log_hs2gw_connectivity 3 200 + check_and_log_hs2gw_connectivity 4 200 +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 + + check_and_log_hs_connectivity 3 4 200 + check_and_log_hs_connectivity 4 3 200 +} + +host_vpn_isolation_tests() +{ + local i + local j + local k + local tmp + local l1="1 2" + local l2="3 4" + local t1=100 + local t2=200 + + log_section "SRv6 VPN isolation test among hosts in different tentants" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation ${i} ${t1} ${j} ${t2} + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + tmp=${t1}; t1=${t2}; t2=${tmp} + done +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit 0 +fi + +cleanup &>/dev/null + +setup + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} -- cgit v1.3.1 From 4f19cab76136e800a3f04d8c9aa4d8e770e3d3d8 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Fri, 4 Dec 2020 12:36:05 +0100 Subject: bpf: Add a bpf_sock_from_file helper While eBPF programs can check whether a file is a socket by file->f_op == &socket_file_ops, they cannot convert the void private_data pointer to a struct socket BTF pointer. In order to do this a new helper wrapping sock_from_file is added. This is useful to tracing programs but also other program types inheriting this set of helpers such as iterators or LSM programs. Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Acked-by: KP Singh Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201204113609.1850150-2-revest@google.com --- include/uapi/linux/bpf.h | 9 +++++++++ kernel/trace/bpf_trace.c | 20 ++++++++++++++++++++ scripts/bpf_helpers_doc.py | 4 ++++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 4 files changed, 42 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1233f14f659f..30b477a26482 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3822,6 +3822,14 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. + * + * struct socket *bpf_sock_from_file(struct file *file) + * Description + * If the given file represents a socket, returns the associated + * socket. + * Return + * A pointer to a struct socket on success or NULL if the file is + * not a socket. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3986,6 +3994,7 @@ union bpf_attr { FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ + FN(sock_from_file), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cb9d7478ef0c..0cf0a6331482 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1270,6 +1270,24 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_sock_from_file, struct file *, file) +{ + return (unsigned long) sock_from_file(file); +} + +BTF_ID_LIST(bpf_sock_from_file_btf_ids) +BTF_ID(struct, socket) +BTF_ID(struct, file) + +static const struct bpf_func_proto bpf_sock_from_file_proto = { + .func = bpf_sock_from_file, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .ret_btf_id = &bpf_sock_from_file_btf_ids[0], + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_sock_from_file_btf_ids[1], +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1366,6 +1384,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_per_cpu_ptr_proto; case BPF_FUNC_bpf_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; + case BPF_FUNC_sock_from_file: + return &bpf_sock_from_file_proto; default: return NULL; } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 8b829748d488..867ada23281c 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -437,6 +437,8 @@ class PrinterHelpers(Printer): 'struct path', 'struct btf_ptr', 'struct inode', + 'struct socket', + 'struct file', ] known_types = { '...', @@ -482,6 +484,8 @@ class PrinterHelpers(Printer): 'struct path', 'struct btf_ptr', 'struct inode', + 'struct socket', + 'struct file', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1233f14f659f..30b477a26482 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3822,6 +3822,14 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. + * + * struct socket *bpf_sock_from_file(struct file *file) + * Description + * If the given file represents a socket, returns the associated + * socket. + * Return + * A pointer to a struct socket on success or NULL if the file is + * not a socket. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3986,6 +3994,7 @@ union bpf_attr { FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ + FN(sock_from_file), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.3.1 From 593f6d41abbbc63e1ad297f7a36ab6060a812f0c Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Fri, 4 Dec 2020 12:36:07 +0100 Subject: selftests/bpf: Add an iterator selftest for bpf_sk_storage_delete The eBPF program iterates over all entries (well, only one) of a socket local storage map and deletes them all. The test makes sure that the entry is indeed deleted. Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201204113609.1850150-4-revest@google.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 64 ++++++++++++++++++++++ .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c | 23 ++++++++ 2 files changed, 87 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 448885b95eed..bb4a638f2e6f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -20,6 +20,7 @@ #include "bpf_iter_bpf_percpu_hash_map.skel.h" #include "bpf_iter_bpf_array_map.skel.h" #include "bpf_iter_bpf_percpu_array_map.skel.h" +#include "bpf_iter_bpf_sk_storage_helpers.skel.h" #include "bpf_iter_bpf_sk_storage_map.skel.h" #include "bpf_iter_test_kern5.skel.h" #include "bpf_iter_test_kern6.skel.h" @@ -913,6 +914,67 @@ out: bpf_iter_bpf_percpu_array_map__destroy(skel); } +/* An iterator program deletes all local storage in a map. */ +static void test_bpf_sk_storage_delete(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_sk_storage_helpers *skel; + union bpf_iter_link_info linfo; + int err, len, map_fd, iter_fd; + struct bpf_link *link; + int sock_fd = -1; + __u32 val = 42; + char buf[64]; + + skel = bpf_iter_bpf_sk_storage_helpers__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load", + "skeleton open_and_load failed\n")) + return; + + map_fd = bpf_map__fd(skel->maps.sk_stg_map); + + sock_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno)) + goto out; + err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map, + &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", + "map value wasn't deleted (err=%d, errno=%d)\n", err, errno)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + if (sock_fd >= 0) + close(sock_fd); + bpf_iter_bpf_sk_storage_helpers__destroy(skel); +} + static void test_bpf_sk_storage_map(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -1067,6 +1129,8 @@ void test_bpf_iter(void) test_bpf_percpu_array_map(); if (test__start_subtest("bpf_sk_storage_map")) test_bpf_sk_storage_map(); + if (test__start_subtest("bpf_sk_storage_delete")) + test_bpf_sk_storage_delete(); if (test__start_subtest("rdonly-buf-out-of-bound")) test_rdonly_buf_out_of_bound(); if (test__start_subtest("buf-neg-offset")) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c new file mode 100644 index 000000000000..01ff3235e413 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google LLC. */ +#include "bpf_iter.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_stg_map SEC(".maps"); + +SEC("iter/bpf_sk_storage_map") +int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx) +{ + if (ctx->sk) + bpf_sk_storage_delete(&sk_stg_map, ctx->sk); + + return 0; +} -- cgit v1.3.1 From bd9b327e58f98aa7126291bf12b50720c660e787 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Fri, 4 Dec 2020 12:36:08 +0100 Subject: selftests/bpf: Add an iterator selftest for bpf_sk_storage_get The eBPF program iterates over all files and tasks. For all socket files, it stores the tgid of the last task it encountered with a handle to that socket. This is a heuristic for finding the "owner" of a socket similar to what's done by lsof, ss, netstat or fuser. Potentially, this information could be used from a cgroup_skb/*gress hook to try to associate network traffic with processes. The test makes sure that a socket it created is tagged with prog_tests's pid. Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201204113609.1850150-5-revest@google.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 40 ++++++++++++++++++++++ .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c | 24 +++++++++++++ 2 files changed, 64 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index bb4a638f2e6f..9336d0f18331 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -975,6 +975,44 @@ out: bpf_iter_bpf_sk_storage_helpers__destroy(skel); } +/* This creates a socket and its local storage. It then runs a task_iter BPF + * program that replaces the existing socket local storage with the tgid of the + * only task owning a file descriptor to this socket, this process, prog_tests. + */ +static void test_bpf_sk_storage_get(void) +{ + struct bpf_iter_bpf_sk_storage_helpers *skel; + int err, map_fd, val = -1; + int sock_fd = -1; + + skel = bpf_iter_bpf_sk_storage_helpers__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load", + "skeleton open_and_load failed\n")) + return; + + sock_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno)) + goto out; + + map_fd = bpf_map__fd(skel->maps.sk_stg_map); + + err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n")) + goto close_socket; + + do_dummy_read(skel->progs.fill_socket_owner); + + err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); + CHECK(err || val != getpid(), "bpf_map_lookup_elem", + "map value wasn't set correctly (expected %d, got %d, err=%d)\n", + getpid(), val, err); + +close_socket: + close(sock_fd); +out: + bpf_iter_bpf_sk_storage_helpers__destroy(skel); +} + static void test_bpf_sk_storage_map(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -1131,6 +1169,8 @@ void test_bpf_iter(void) test_bpf_sk_storage_map(); if (test__start_subtest("bpf_sk_storage_delete")) test_bpf_sk_storage_delete(); + if (test__start_subtest("bpf_sk_storage_get")) + test_bpf_sk_storage_get(); if (test__start_subtest("rdonly-buf-out-of-bound")) test_rdonly_buf_out_of_bound(); if (test__start_subtest("buf-neg-offset")) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c index 01ff3235e413..dde53df37de8 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c @@ -21,3 +21,27 @@ int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx) return 0; } + +SEC("iter/task_file") +int fill_socket_owner(struct bpf_iter__task_file *ctx) +{ + struct task_struct *task = ctx->task; + struct file *file = ctx->file; + struct socket *sock; + int *sock_tgid; + + if (!task || !file) + return 0; + + sock = bpf_sock_from_file(file); + if (!sock) + return 0; + + sock_tgid = bpf_sk_storage_get(&sk_stg_map, sock->sk, 0, 0); + if (!sock_tgid) + return 0; + + *sock_tgid = task->tgid; + + return 0; +} -- cgit v1.3.1 From 34da87213d3ddd26643aa83deff7ffc6463da0fc Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Fri, 4 Dec 2020 12:36:09 +0100 Subject: selftests/bpf: Test bpf_sk_storage_get in tcp iterators This extends the existing bpf_sk_storage_get test where a socket is created and tagged with its creator's pid by a task_file iterator. A TCP iterator is now also used at the end of the test to negate the values already stored in the local storage. The test therefore expects -getpid() to be stored in the local storage. Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201204113609.1850150-6-revest@google.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 18 ++++++++++++++++-- .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 9336d0f18331..0e586368948d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -978,6 +978,8 @@ out: /* This creates a socket and its local storage. It then runs a task_iter BPF * program that replaces the existing socket local storage with the tgid of the * only task owning a file descriptor to this socket, this process, prog_tests. + * It then runs a tcp socket iterator that negates the value in the existing + * socket local storage, the test verifies that the resulting value is -pid. */ static void test_bpf_sk_storage_get(void) { @@ -994,6 +996,10 @@ static void test_bpf_sk_storage_get(void) if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno)) goto out; + err = listen(sock_fd, 1); + if (CHECK(err != 0, "listen", "errno: %d\n", errno)) + goto close_socket; + map_fd = bpf_map__fd(skel->maps.sk_stg_map); err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); @@ -1003,9 +1009,17 @@ static void test_bpf_sk_storage_get(void) do_dummy_read(skel->progs.fill_socket_owner); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - CHECK(err || val != getpid(), "bpf_map_lookup_elem", + if (CHECK(err || val != getpid(), "bpf_map_lookup_elem", + "map value wasn't set correctly (expected %d, got %d, err=%d)\n", + getpid(), val, err)) + goto close_socket; + + do_dummy_read(skel->progs.negate_socket_local_storage); + + err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); + CHECK(err || val != -getpid(), "bpf_map_lookup_elem", "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - getpid(), val, err); + -getpid(), val, err); close_socket: close(sock_fd); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c index dde53df37de8..6cecab2b32ba 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c @@ -45,3 +45,21 @@ int fill_socket_owner(struct bpf_iter__task_file *ctx) return 0; } + +SEC("iter/tcp") +int negate_socket_local_storage(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + int *sock_tgid; + + if (!sk_common) + return 0; + + sock_tgid = bpf_sk_storage_get(&sk_stg_map, sk_common, 0, 0); + if (!sock_tgid) + return 0; + + *sock_tgid = -*sock_tgid; + + return 0; +} -- cgit v1.3.1 From 41fdfffd5783db62bb9e00605eee14c69b9c0974 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 2 Dec 2020 15:35:43 +0100 Subject: selftests: forwarding: Add MPLS L2VPN test Connect hosts H1 and H2 using two intermediate encapsulation routers (LER1 and LER2). These routers encapsulate traffic from the hosts, including the original Ethernet header, into MPLS. Use ping to test reachability between H1 and H2. Signed-off-by: Guillaume Nault Link: https://lore.kernel.org/r/625f5c1aafa3a8085f8d3e082d680a82e16ffbaa.1606918980.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 1 + tools/testing/selftests/net/forwarding/config | 3 + .../selftests/net/forwarding/tc_mpls_l2vpn.sh | 192 +++++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 250fbb2d1625..d97bd6889446 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -48,6 +48,7 @@ TEST_PROGS = bridge_igmp.sh \ tc_chains.sh \ tc_flower_router.sh \ tc_flower.sh \ + tc_mpls_l2vpn.sh \ tc_shblocks.sh \ tc_vlan_modify.sh \ vxlan_asymmetric.sh \ diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index da96eff72a8e..10e9a3321ae1 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,6 +6,9 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_MPLS=m +CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m diff --git a/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh new file mode 100755 index 000000000000..03743f04e178 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh @@ -0,0 +1,192 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +# | H1 (v$h1) | +# | 192.0.2.1/24 | +# | 2001:db8::1/124 | +# | + $h1 | +# +-----------------|-----+ +# | +# | (Plain Ethernet traffic) +# | +# +-----------------|-----------------------------------------+ +# | LER1 + $edge1 | +# | -ingress: | +# | -encapsulate Ethernet into MPLS | +# | -add outer Ethernet header | +# | -redirect to $mpls1 (egress) | +# | | +# | + $mpls1 | +# | | -ingress: | +# | | -remove outer Ethernet header | +# | | -remove MPLS header | +# | | -redirect to $edge1 (egress) | +# +-----------------|-----------------------------------------+ +# | +# | (Ethernet over MPLS traffic) +# | +# +-----------------|-----------------------------------------+ +# | LER2 + $mpls2 | +# | -ingress: | +# | -remove outer Ethernet header | +# | -remove MPLS header | +# | -redirect to $edge2 (egress) | +# | | +# | + $edge2 | +# | | -ingress: | +# | | -encapsulate Ethernet into MPLS | +# | | -add outer Ethernet header | +# | | -redirect to $mpls2 (egress) | +# +-----------------|-----------------------------------------| +# | +# | (Plain Ethernet traffic) +# | +# +-----------------|-----+ +# | H2 (v$h2) | | +# | + $h2 | +# | 192.0.2.2/24 | +# | 2001:db8::2/124 | +# +-----------------------+ +# +# LER1 and LER2 logically represent two different routers. However, no VRF is +# created for them, as they don't do any IP routing. + +ALL_TESTS="mpls_forward_eth" +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8::1/124 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8::1/124 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8::2/124 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8::2/124 +} + +ler1_create() +{ + tc qdisc add dev $edge1 ingress + tc filter add dev $edge1 ingress \ + matchall \ + action mpls mac_push label 102 \ + action vlan push_eth dst_mac $mpls2mac src_mac $mpls1mac \ + action mirred egress redirect dev $mpls1 + ip link set dev $edge1 up + + tc qdisc add dev $mpls1 ingress + tc filter add dev $mpls1 ingress \ + protocol mpls_uc \ + flower mpls_label 101 \ + action vlan pop_eth \ + action mpls pop protocol teb \ + action mirred egress redirect dev $edge1 + ip link set dev $mpls1 up +} + +ler1_destroy() +{ + ip link set dev $mpls1 down + tc qdisc del dev $mpls1 ingress + + ip link set dev $edge1 down + tc qdisc del dev $edge1 ingress +} + +ler2_create() +{ + tc qdisc add dev $edge2 ingress + tc filter add dev $edge2 ingress \ + matchall \ + action mpls mac_push label 101 \ + action vlan push_eth dst_mac $mpls1mac src_mac $mpls2mac \ + action mirred egress redirect dev $mpls2 + ip link set dev $edge2 up + + tc qdisc add dev $mpls2 ingress + tc filter add dev $mpls2 ingress \ + protocol mpls_uc \ + flower mpls_label 102 \ + action vlan pop_eth \ + action mpls pop protocol teb \ + action mirred egress redirect dev $edge2 + ip link set dev $mpls2 up +} + +ler2_destroy() +{ + ip link set dev $mpls2 down + tc qdisc del dev $mpls2 ingress + + ip link set dev $edge2 down + tc qdisc del dev $edge2 ingress +} + +mpls_forward_eth() +{ + ping_test $h1 192.0.2.2 + ping6_test $h1 2001:db8::2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + edge1=${NETIFS[p2]} + + mpls1=${NETIFS[p3]} + mpls2=${NETIFS[p4]} + + edge2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + mpls1mac=$(mac_get $mpls1) + mpls2mac=$(mac_get $mpls2) + + vrf_prepare + + h1_create + h2_create + ler1_create + ler2_create +} + +cleanup() +{ + pre_cleanup + + ler2_destroy + ler1_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + tests_run +fi + +exit $EXIT_STATUS -- cgit v1.3.1 From 205704c618af0ab2366015d2281a3b0814d918a0 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Thu, 3 Dec 2020 22:06:04 -0500 Subject: vrf: packets with lladdr src needs dst at input with orig_iif when needs strict Depending on the order of the routes to fe80::/64 are installed on the VRF table, the NS for the source link-local address of the originator might be sent to the wrong interface. This patch ensures that packets with link-local addr source is doing a lookup with the orig_iif when the destination addr indicates that it is strict. Add the reproducer as a use case in self test script fcnal-test.sh. Fixes: b4869aa2f881 ("net: vrf: ipv6 support for local traffic to local addresses") Signed-off-by: Stephen Suryaputra Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20201204030604.18828-1-ssuryaextr@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 10 +++- tools/testing/selftests/net/fcnal-test.sh | 95 +++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f2793ffde191..b9b7e00b72a8 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1315,11 +1315,17 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, int orig_iif = skb->skb_iif; bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); + bool is_ll_src; /* loopback, multicast & non-ND link-local traffic; do not push through - * packet taps again. Reset pkt_type for upper layers to process skb + * packet taps again. Reset pkt_type for upper layers to process skb. + * for packets with lladdr src, however, skip so that the dst can be + * determine at input using original ifindex in the case that daddr + * needs strict */ - if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { + is_ll_src = ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL; + if (skb->pkt_type == PACKET_LOOPBACK || + (need_strict && !is_ndisc && !is_ll_src)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index fb5c55dd6df8..02b0b9ead40b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -256,6 +256,28 @@ setup_cmd_nsb() fi } +setup_cmd_nsc() +{ + local cmd="$*" + local rc + + run_cmd_nsc ${cmd} + rc=$? + if [ $rc -ne 0 ]; then + # show user the command if not done so already + if [ "$VERBOSE" = "0" ]; then + echo "setup command: $cmd" + fi + echo "failed. stopping tests" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue" + read a + fi + exit $rc + fi +} + # set sysctl values in NS-A set_sysctl() { @@ -471,6 +493,36 @@ setup() sleep 1 } +setup_lla_only() +{ + # make sure we are starting with a clean slate + kill_procs + cleanup 2>/dev/null + + log_debug "Configuring network namespaces" + set -e + + create_ns ${NSA} "-" "-" + create_ns ${NSB} "-" "-" + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV} "-" "-" \ + ${NSB} ${NSB_DEV} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} "-" "-" \ + ${NSC} ${NSC_DEV} "-" "-" + + NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV}) + NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV}) + NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV}) + + create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-" + ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF} + ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF} + + set +e + + sleep 1 +} + ################################################################################ # IPv4 @@ -3787,10 +3839,53 @@ use_case_br() setup_cmd_nsb ip li del vlan100 2>/dev/null } +# VRF only. +# ns-A device is connected to both ns-B and ns-C on a single VRF but only has +# LLA on the interfaces +use_case_ping_lla_multi() +{ + setup_lla_only + # only want reply from ns-A + setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1 + setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B" + + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C" + + # cycle/flap the first ns-A interface + setup_cmd ip link set ${NSA_DEV} down + setup_cmd ip link set ${NSA_DEV} up + sleep 1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B" + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C" + + # cycle/flap the second ns-A interface + setup_cmd ip link set ${NSA_DEV2} down + setup_cmd ip link set ${NSA_DEV2} up + sleep 1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B" + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C" +} + use_cases() { log_section "Use cases" + log_subsection "Device enslaved to bridge" use_case_br + log_subsection "Ping LLA with multiple interfaces" + use_case_ping_lla_multi } ################################################################################ -- cgit v1.3.1 From da777be6de014be6b302644685797ed3860a0d0d Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 15 Oct 2020 07:11:57 +0800 Subject: tools: gpio: add support for reporting realtime event clock to lsgpio Add support for reporting if a line is configured to report realtime timestamps in events. Signed-off-by: Kent Gibson Link: https://lore.kernel.org/r/20201014231158.34117-3-warthog618@gmail.com Signed-off-by: Linus Walleij --- tools/gpio/lsgpio.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c index 5a05a454d0c9..c61d061247e1 100644 --- a/tools/gpio/lsgpio.c +++ b/tools/gpio/lsgpio.c @@ -65,6 +65,10 @@ struct gpio_flag flagnames[] = { .name = "bias-disabled", .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED, }, + { + .name = "clock-realtime", + .mask = GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME, + }, }; static void print_attributes(struct gpio_v2_line_info *info) -- cgit v1.3.1 From e0822cf9b892ed051830daaf57896aca48c8567b Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 15 Oct 2020 07:11:58 +0800 Subject: tools: gpio: add option to report wall-clock time to gpio-event-mon Add support for selecting the realtime clock for events. Signed-off-by: Kent Gibson Link: https://lore.kernel.org/r/20201014231158.34117-4-warthog618@gmail.com Signed-off-by: Linus Walleij --- tools/gpio/gpio-event-mon.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 90c3155f05b1..cacd66ad7926 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -148,6 +148,7 @@ void print_usage(void) " -s Set line as open source\n" " -r Listen for rising edges\n" " -f Listen for falling edges\n" + " -w Report the wall-clock time for events\n" " -b Debounce the line with period n microseconds\n" " [-c ] Do loops (optional, infinite loop if not stated)\n" " -? This helptext\n" @@ -173,7 +174,7 @@ int main(int argc, char **argv) memset(&config, 0, sizeof(config)); config.flags = GPIO_V2_LINE_FLAG_INPUT; - while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) { + while ((c = getopt(argc, argv, "c:n:o:b:dsrfw?")) != -1) { switch (c) { case 'c': loops = strtoul(optarg, NULL, 10); @@ -204,6 +205,9 @@ int main(int argc, char **argv) case 'f': config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING; break; + case 'w': + config.flags |= GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME; + break; case '?': print_usage(); return -1; -- cgit v1.3.1 From 4e9a5ae8df5b3365183150f6df49e49dece80d8c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Dec 2020 13:50:37 +0900 Subject: x86/uprobes: Do not use prefixes.nbytes when looping over prefixes.bytes Since insn.prefixes.nbytes can be bigger than the size of insn.prefixes.bytes[] when a prefix is repeated, the proper check must be insn.prefixes.bytes[i] != 0 and i < 4 instead of using insn.prefixes.nbytes. Introduce a for_each_insn_prefix() macro for this purpose. Debugged by Kees Cook . [ bp: Massage commit message, sync with the respective header in tools/ and drop "we". ] Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com Signed-off-by: Masami Hiramatsu Signed-off-by: Borislav Petkov Reviewed-by: Srikar Dronamraju Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/160697103739.3146288.7437620795200799020.stgit@devnote2 --- arch/x86/include/asm/insn.h | 15 +++++++++++++++ arch/x86/kernel/uprobes.c | 10 ++++++---- tools/arch/x86/include/asm/insn.h | 15 +++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 5c1ae3eff9d4..a8c3d284fa46 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3fdaa042823d..138bdb1fd136 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = { static bool is_prefix_bad(struct insn *insn) { + insn_byte_t p; int i; - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_ES): case INAT_MAKE_PREFIX(INAT_PFX_CS): @@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); + insn_byte_t p; int i; switch (opc1) { @@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { - if (insn->prefixes.bytes[i] == 0x66) + for_each_insn_prefix(insn, i, p) { + if (p == 0x66) return -ENOTSUPP; } diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index 568854b14d0a..52c6262e6bfd 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e -- cgit v1.3.1 From d8cbe8bfa7df3c680ddfd5e1eee3a5c86d8dc764 Mon Sep 17 00:00:00 2001 From: Xingxing Su Date: Sat, 5 Dec 2020 22:15:02 -0800 Subject: tools/testing/selftests/vm: fix build error Only x86 and PowerPC implement the pkey-xxx.h, and an error was reported when compiling protection_keys.c. Add a Arch judgment to compile "protection_keys" in the Makefile. If other arch implement this, add the arch name to the Makefile. eg: ifneq (,$(findstring $(ARCH),powerpc mips ... )) Following build errors: pkey-helpers.h:93:2: error: #error Architecture not supported #error Architecture not supported pkey-helpers.h:96:20: error: `PKEY_DISABLE_ACCESS' undeclared #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) ^ protection_keys.c:218:45: error: `PKEY_DISABLE_WRITE' undeclared pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); ^ Signed-off-by: Xingxing Su Signed-off-by: Andrew Morton Cc: Shuah Khan Cc: Sandipan Das Cc: John Hubbard Cc: Dave Hansen Cc: "Kirill A. Shutemov" Cc: Brian Geffon Cc: Mina Almasry Link: https://lkml.kernel.org/r/1606826876-30656-1-git-send-email-suxingxing@loongson.cn Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 30873b19d04b..691893afc15d 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -60,9 +60,13 @@ ifeq ($(CAN_BUILD_X86_64),1) TEST_GEN_FILES += $(BINARIES_64) endif else + +ifneq (,$(findstring $(ARCH),powerpc)) TEST_GEN_FILES += protection_keys endif +endif + ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range -- cgit v1.3.1 From 573a259336f8c57739bdaf035aa7abbae7d9a713 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Sat, 5 Dec 2020 22:15:05 -0800 Subject: userfaultfd: selftests: fix SIGSEGV if huge mmap fails The error handling in hugetlb_allocate_area() was incorrect for the hugetlb_shared test case. Previously the behavior was: - mmap a hugetlb area - If this fails, set the pointer to NULL, and carry on - mmap an alias of the same hugetlb fd - If this fails, munmap the original area If the original mmap failed, it's likely the second one did too. If both failed, we'd blindly try to munmap a NULL pointer, causing a SIGSEGV. Instead, "goto fail" so we return before trying to mmap the alias. This issue can be hit "in real life" by forgetting to set /proc/sys/vm/nr_hugepages (leaving it at 0), and then trying to run the hugetlb_shared test. Another small improvement is, when the original mmap fails, don't just print "it failed": perror(), so we can see *why*. :) Signed-off-by: Axel Rasmussen Signed-off-by: Andrew Morton Cc: Shuah Khan Cc: Peter Xu Cc: Joe Perches Cc: Mike Rapoport Cc: Andrea Arcangeli Cc: David Alan Gilbert Link: https://lkml.kernel.org/r/20201204203443.2714693-1-axelrasmussen@google.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 9b0912a01777..c4425597769a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -206,19 +206,19 @@ static int hugetlb_release_pages(char *rel_area) return ret; } - static void hugetlb_allocate_area(void **alloc_area) { void *area_alias = NULL; char **alloc_area_alias; + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, (map_shared ? MAP_SHARED : MAP_PRIVATE) | MAP_HUGETLB, huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (*alloc_area == MAP_FAILED) { - fprintf(stderr, "mmap of hugetlbfs file failed\n"); - *alloc_area = NULL; + perror("mmap of hugetlbfs file failed"); + goto fail; } if (map_shared) { @@ -227,14 +227,11 @@ static void hugetlb_allocate_area(void **alloc_area) huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) { - if (munmap(*alloc_area, nr_pages * page_size) < 0) { - perror("hugetlb munmap"); - exit(1); - } - *alloc_area = NULL; - return; + perror("mmap of hugetlb file alias failed"); + goto fail_munmap; } } + if (*alloc_area == area_src) { huge_fd_off0 = *alloc_area; alloc_area_alias = &area_src_alias; @@ -243,6 +240,16 @@ static void hugetlb_allocate_area(void **alloc_area) } if (area_alias) *alloc_area_alias = area_alias; + + return; + +fail_munmap: + if (munmap(*alloc_area, nr_pages * page_size) < 0) { + perror("hugetlb munmap"); + exit(1); + } +fail: + *alloc_area = NULL; } static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) -- cgit v1.3.1 From 23fb55526d80122710c28cb6be0e5dba65a6a3f1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 6 Dec 2020 10:22:22 +0200 Subject: selftests: mlxsw: Test RIF's reference count when joining a LAG Test that the reference count of a router interface (RIF) configured for a LAG is incremented / decremented when ports join / leave the LAG. Use the offload indication on routes configured on the RIF to understand if it was created / destroyed. The test fails without the previous patch. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index a2eff5f58209..ed346da5d3cb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -22,6 +22,7 @@ ALL_TESTS=" duplicate_vlans_test vlan_rif_refcount_test subport_rif_refcount_test + subport_rif_lag_join_test vlan_dev_deletion_test lag_unlink_slaves_test lag_dev_deletion_test @@ -440,6 +441,48 @@ subport_rif_refcount_test() ip link del dev bond1 } +subport_rif_lag_join_test() +{ + # Test that the reference count of a RIF configured for a LAG is + # incremented / decremented when ports join / leave the LAG. We use the + # offload indication on routes configured on the RIF to understand if + # it was created / destroyed + RET=0 + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link set dev bond1 up + ip -6 address add 2001:db8:1::1/64 dev bond1 + + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif was not created on lag device" + + ip link set dev $swp1 nomaster + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was destroyed after removing one port" + + ip link set dev $swp1 master bond1 + ip link set dev $swp2 nomaster + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was destroyed after re-adding a port and removing another" + + ip link set dev $swp1 nomaster + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was not destroyed when should" + + log_test "subport rif lag join" + + ip link del dev bond1 +} + vlan_dev_deletion_test() { # Test that VLAN devices are correctly deleted / unlinked when enslaved -- cgit v1.3.1 From 6f39cecdb6018234a47dcea15121f01b9903d16b Mon Sep 17 00:00:00 2001 From: Xingxing Su Date: Wed, 25 Nov 2020 12:04:57 +0800 Subject: rseq/selftests: Fix MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ build error under other arch. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Except arch x86, the function rseq_offset_deref_addv is not defined. The function test_membarrier_manager_thread call rseq_offset_deref_addv produces a build error. The RSEQ_ARCH_HAS_OFFSET_DEREF_ADD should contain all the code for the MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ. If the other Arch implements this feature, defined RSEQ_ARCH_HAS_OFFSET_DEREF_ADD in the header file to ensure that this feature is available. Following build errors: param_test.c: In function ‘test_membarrier_worker_thread’: param_test.c:1164:10: warning: implicit declaration of function ‘rseq_offset_deref_addv’ ret = rseq_offset_deref_addv(&args->percpu_list_ptr, ^~~~~~~~~~~~~~~~~~~~~~ /tmp/ccMj9yHJ.o: In function `test_membarrier_worker_thread': param_test.c:1164: undefined reference to `rseq_offset_deref_addv' param_test.c:1164: undefined reference to `rseq_offset_deref_addv' collect2: error: ld returned 1 exit status make: *** [/selftests/rseq/param_test_benchmark] Error 1 Signed-off-by: Xingxing Su Acked-by: Mathieu Desnoyers Signed-off-by: Shuah Khan --- tools/testing/selftests/rseq/param_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 384589095864..699ad5f93c34 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -1133,6 +1133,8 @@ static int set_signal_handler(void) return ret; } +/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ +#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV struct test_membarrier_thread_args { int stop; intptr_t percpu_list_ptr; @@ -1286,8 +1288,6 @@ void *test_membarrier_manager_thread(void *arg) return NULL; } -/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ -#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV void test_membarrier(void) { const int num_threads = opt_threads; -- cgit v1.3.1 From 88f4ede44c585b24674dd99841040b2a1a856a76 Mon Sep 17 00:00:00 2001 From: Xingxing Su Date: Fri, 27 Nov 2020 11:16:57 +0800 Subject: selftests/clone3: Fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling the selftests with the -std=gnu99 option the build can fail with. Following build error: test_core.c: In function ‘test_cgcore_destroy’: test_core.c:87:2: error: ‘for’ loop initial declarations are only allowed in C99 mode for (int i = 0; i < 10; i++) { ^ test_core.c:87:2: note: use option -std=c99 or -std=gnu99 to compile Add -std=gnu99 to the clone3 selftest Makefile to fix this. Signed-off-by: Xingxing Su Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/clone3/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile index ef7564cb7abe..79b19a2863a0 100644 --- a/tools/testing/selftests/clone3/Makefile +++ b/tools/testing/selftests/clone3/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -g -I../../../../usr/include/ +CFLAGS += -g -std=gnu99 -I../../../../usr/include/ LDLIBS += -lcap TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \ -- cgit v1.3.1 From 07f262d80d5f1f426da4557066bf0ec24ee32d97 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 23 Nov 2020 16:56:05 -0800 Subject: tools/power/x86/intel-speed-select: Read TRL from mailbox When SST-PP feature is not present, the TRL (Turbo Ratio Limits) is read from MSRs. This is done as the mailbox command will fail on Skylake-X based platform. But for IceLake servers, mailbox commands can still be used. So add a check to allow for non Skylake based platforms to read from mail box commands. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/ Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/isst-core.c | 2 +- tools/power/x86/intel-speed-select/isst.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 1d7ecb54352e..8afd23407522 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -804,7 +804,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev) return ret; } - if (!pkg_dev->enabled) { + if (!pkg_dev->enabled && is_skx_based_platform()) { int freq; freq = get_cpufreq_base_freq(cpu); diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 29715e9c2e06..60db0bb084d5 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -255,4 +255,5 @@ extern int is_clx_n_platform(void); extern int get_cpufreq_base_freq(int cpu); extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap); extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg); +extern int is_skx_based_platform(void); #endif -- cgit v1.3.1 From 6c4832253a2d2259fd4002e3c4511035f81f48f6 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 3 Dec 2020 13:08:37 -0800 Subject: tools/power/x86/intel-speed-select: Account for missing sysfs for die_id Some older kernels will not have support to get CPU die_id from the sysfs. This requires several back ports. But the tool depends on getting die_id to match to correct CPU. Relax this restriction and use die_id as 0 when die_id is missing. This is not a problem as we don't have any multi-die processors with Intel SST support. This helps in running this tool on older kernels with just Intel SST drivers back ported. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/ Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/isst-config.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index cd089a505859..271bc79bdc59 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -328,8 +328,12 @@ int get_physical_die_id(int cpu) int core_id, pkg_id, die_id; ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id); - if (!ret) + if (!ret) { + if (die_id < 0) + die_id = 0; + return die_id; + } } if (ret < 0) -- cgit v1.3.1 From 5e27cb9bca6746d1c9c76c4b4aeececcb18b2120 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 3 Dec 2020 14:29:40 -0800 Subject: tools/power/x86/intel-speed-select: Update version for v5.11 Update version for changes released with v5.11 kernel release. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/ Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/isst-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 271bc79bdc59..5390158cdb40 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.6"; +static const char *version_str = "v1.7"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; -- cgit v1.3.1 From c25ce589dca10d64dde139ae093abc258a32869c Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Mon, 23 Nov 2020 15:15:33 +0100 Subject: tweewide: Fix most Shebang lines Change every shebang which does not need an argument to use /usr/bin/env. This is needed as not every distro has everything under /usr/bin, sometimes not even bash. Signed-off-by: Finn Behrens Signed-off-by: Masahiro Yamada --- Documentation/sphinx/parse-headers.pl | 2 +- Documentation/target/tcm_mod_builder.py | 2 +- Documentation/trace/postprocess/decode_msr.py | 2 +- Documentation/trace/postprocess/trace-pagealloc-postprocess.pl | 2 +- Documentation/trace/postprocess/trace-vmscan-postprocess.pl | 2 +- arch/ia64/scripts/unwcheck.py | 2 +- scripts/bloat-o-meter | 2 +- scripts/config | 2 +- scripts/diffconfig | 2 +- scripts/get_abi.pl | 2 +- scripts/show_delta | 2 +- scripts/sphinx-pre-install | 2 +- scripts/split-man.pl | 2 +- scripts/tracing/draw_functrace.py | 2 +- tools/perf/python/tracepoint.py | 2 +- tools/perf/python/twatch.py | 2 +- tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py | 2 +- tools/testing/ktest/compare-ktest-sample.pl | 2 +- tools/testing/kunit/kunit.py | 2 +- tools/testing/kunit/kunit_tool_test.py | 2 +- tools/testing/selftests/bpf/test_offload.py | 2 +- tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py | 2 +- tools/testing/selftests/kselftest/prefix.pl | 2 +- tools/testing/selftests/net/devlink_port_split.py | 2 +- tools/testing/selftests/tc-testing/tdc_batch.py | 2 +- tools/testing/selftests/tc-testing/tdc_multibatch.py | 2 +- 26 files changed, 26 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl index 1910079f984f..b063f2f1cfb2 100755 --- a/Documentation/sphinx/parse-headers.pl +++ b/Documentation/sphinx/parse-headers.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl use strict; use Text::Tabs; use Getopt::Long; diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 1548d8420499..54492aa813b9 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # The TCM v4 multi-protocol fabric module generation script for drivers/target/$NEW_MOD # # Copyright (c) 2010 Rising Tide Systems diff --git a/Documentation/trace/postprocess/decode_msr.py b/Documentation/trace/postprocess/decode_msr.py index 0ab40e0db580..aa9cc7abd5c2 100644 --- a/Documentation/trace/postprocess/decode_msr.py +++ b/Documentation/trace/postprocess/decode_msr.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # add symbolic names to read_msr / write_msr in trace # decode_msr msr-index.h < trace import sys diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl index 0a120aae33ce..b9b7d80c2f9d 100644 --- a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl +++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # This is a POC (proof of concept or piece of crap, take your pick) for reading the # text representation of trace output related to page allocation. It makes an attempt # to extract some high-level information on what is going on. The accuracy of the parser diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 995da15b16ca..2f4e39875fb3 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # This is a POC for reading the text representation of trace output related to # page reclaim. It makes an attempt to extract some high-level information on # what is going on. The accuracy of the parser may vary diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py index c55276e31b6b..bfd1b671e35f 100644 --- a/arch/ia64/scripts/unwcheck.py +++ b/arch/ia64/scripts/unwcheck.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # # Usage: unwcheck.py FILE diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index d7ca46c612b3..652e9542043f 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # # Copyright 2004 Matt Mackall # diff --git a/scripts/config b/scripts/config index eee5b7f3a092..8c8d7c3d7acc 100755 --- a/scripts/config +++ b/scripts/config @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # SPDX-License-Identifier: GPL-2.0 # Manipulate options in a .config file from the command line diff --git a/scripts/diffconfig b/scripts/diffconfig index 89abf777f197..627eba5849b5 100755 --- a/scripts/diffconfig +++ b/scripts/diffconfig @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # # diffconfig - a tool to compare .config files. diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl index 68dab828a722..92d9aa6cc4f5 100755 --- a/scripts/get_abi.pl +++ b/scripts/get_abi.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 use strict; diff --git a/scripts/show_delta b/scripts/show_delta index 264399307c4f..28e67e178194 100755 --- a/scripts/show_delta +++ b/scripts/show_delta @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0-only # # show_deltas: Read list of printk messages instrumented with diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install index 40fa6923e80a..828a8615a918 100755 --- a/scripts/sphinx-pre-install +++ b/scripts/sphinx-pre-install @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0-or-later use strict; diff --git a/scripts/split-man.pl b/scripts/split-man.pl index c3db607ee9ec..96bd99dc977a 100755 --- a/scripts/split-man.pl +++ b/scripts/split-man.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 # # Author: Mauro Carvalho Chehab diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index b65735758520..74f8aadfd4cb 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0-only """ diff --git a/tools/perf/python/tracepoint.py b/tools/perf/python/tracepoint.py index eb76f6516247..461848c7f57d 100755 --- a/tools/perf/python/tracepoint.py +++ b/tools/perf/python/tracepoint.py @@ -1,4 +1,4 @@ -#! /usr/bin/python +#! /usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # -*- python -*- # -*- coding: utf-8 -*- diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index ff87ccf5b708..04f3db29b9bc 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py @@ -1,4 +1,4 @@ -#! /usr/bin/python +#! /usr/bin/env python # SPDX-License-Identifier: GPL-2.0-only # -*- python -*- # -*- coding: utf-8 -*- diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py index 3c47865bb247..e15e20696d17 100755 --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0-only # -*- coding: utf-8 -*- # diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl index 4118eb4a842d..ebea21d0a1be 100755 --- a/tools/testing/ktest/compare-ktest-sample.pl +++ b/tools/testing/ktest/compare-ktest-sample.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 open (IN,"ktest.pl"); diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index d4f7846d0745..21516e293d17 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # A thin wrapper on top of the KUnit Kernel diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 497ab51bc170..b593f4448e83 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # A collection of tests for tools/testing/kunit/kunit.py diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 43c9cda199b8..f736d34b89e1 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright (C) 2017 Netronome Systems, Inc. # Copyright (c) 2019 Mellanox Technologies. All rights reserved diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py index 0d4b9327c9b3..2223337eed0c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 import subprocess diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl index 31f7c2a0a8bd..12a7f4ca2684 100755 --- a/tools/testing/selftests/kselftest/prefix.pl +++ b/tools/testing/selftests/kselftest/prefix.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 # Prefix all lines with "# ", unbuffered. Command being piped in may need # to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd". diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py index 58bb7e9b88ce..834066d465fc 100755 --- a/tools/testing/selftests/net/devlink_port_split.py +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 from subprocess import PIPE, Popen diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py index 995f66ce43eb..35d5d9493784 100755 --- a/tools/testing/selftests/tc-testing/tdc_batch.py +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ tdc_batch.py - a script to generate TC batch file diff --git a/tools/testing/selftests/tc-testing/tdc_multibatch.py b/tools/testing/selftests/tc-testing/tdc_multibatch.py index 5e7237952e49..48e1f17ff2e8 100755 --- a/tools/testing/selftests/tc-testing/tdc_multibatch.py +++ b/tools/testing/selftests/tc-testing/tdc_multibatch.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 """ tdc_multibatch.py - a thin wrapper over tdc_batch.py to generate multiple batch -- cgit v1.3.1 From 932c60558109a9131e54dacfda6070147fd1cdfb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 4 Dec 2020 15:20:01 -0800 Subject: tools/bpftool: Fix PID fetching with a lot of results In case of having so many PID results that they don't fit into a singe page (4096) bytes, bpftool will erroneously conclude that it got corrupted data due to 4096 not being a multiple of struct pid_iter_entry, so the last entry will be partially truncated. Fix this by sizing the buffer to fit exactly N entries with no truncation in the middle of record. Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201204232002.3589803-1-andrii@kernel.org --- tools/bpf/bpftool/pids.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index df7d8ec76036..477e55d59c34 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -89,9 +89,9 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level, int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) { - char buf[4096]; - struct pid_iter_bpf *skel; struct pid_iter_entry *e; + char buf[4096 / sizeof(*e) * sizeof(*e)]; + struct pid_iter_bpf *skel; int err, ret, fd = -1, i; libbpf_print_fn_t default_print; -- cgit v1.3.1 From 547f574fd9d5e3925d47fd44decbf6ab6df94b0e Mon Sep 17 00:00:00 2001 From: Mathieu Chouquet-Stringer Date: Wed, 2 Dec 2020 16:32:43 +0100 Subject: docs: Update documentation to reflect what TAINT_CPU_OUT_OF_SPEC means Here's a patch updating the meaning of TAINT_CPU_OUT_OF_SPEC after Borislav introduced changes in a7e1f67ed29f and upcoming patches in tip. TAINT_CPU_OUT_OF_SPEC now means a bit more what it implies as the flag isn't set just because of a CPU misconfiguration or mismatch. Historically it was for SMP kernel oops on an officially SMP incapable processor but now it also covers CPUs whose MSRs have been incorrectly poked at from userspace, drivers being used on non supported architectures, broken firmware, mismatched CPUs, ... Update documentation and script to reflect that. Signed-off-by: Mathieu Chouquet-Stringer Link: https://lore.kernel.org/r/20201202153244.709752-1-me@mathieu.digital Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 2 +- Documentation/admin-guide/tainted-kernels.rst | 23 ++++++++++++++++++----- tools/debugging/kernel-chktaint | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d7a17859adbd..1d56a6b73a4e 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1336,7 +1336,7 @@ ORed together. The letters are seen in "Tainted" line of Oops reports. ====== ===== ============================================================== 1 `(P)` proprietary module was loaded 2 `(F)` module was force loaded - 4 `(S)` SMP kernel oops on an officially SMP incapable processor + 4 `(S)` kernel running on an out of specification system 8 `(R)` module was force unloaded 16 `(M)` processor reported a Machine Check Exception (MCE) 32 `(B)` bad page referenced or some unexpected page flags diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst index f718a2eaf1f6..ceeed7b0798d 100644 --- a/Documentation/admin-guide/tainted-kernels.rst +++ b/Documentation/admin-guide/tainted-kernels.rst @@ -84,7 +84,7 @@ Bit Log Number Reason that got the kernel tainted === === ====== ======================================================== 0 G/P 1 proprietary module was loaded 1 _/F 2 module was force loaded - 2 _/S 4 SMP kernel oops on an officially SMP incapable processor + 2 _/S 4 kernel running on an out of specification system 3 _/R 8 module was force unloaded 4 _/M 16 processor reported a Machine Check Exception (MCE) 5 _/B 32 bad page referenced or some unexpected page flags @@ -116,10 +116,23 @@ More detailed explanation for tainting 1) ``F`` if any module was force loaded by ``insmod -f``, ``' '`` if all modules were loaded normally. - 2) ``S`` if the oops occurred on an SMP kernel running on hardware that - hasn't been certified as safe to run multiprocessor. - Currently this occurs only on various Athlons that are not - SMP capable. + 2) ``S`` if the kernel is running on a processor or system that is out of + specification: hardware has been put into an unsupported configuration, + therefore proper execution cannot be guaranteed. + Kernel will be tainted if, for example: + + - on x86: PAE is forced through forcepae on intel CPUs (such as Pentium M) + which do not report PAE but may have a functional implementation, an SMP + kernel is running on non officially capable SMP Athlon CPUs, MSRs are + being poked at from userspace. + - on arm: kernel running on certain CPUs (such as Keystone 2) without + having certain kernel features enabled. + - on arm64: there are mismatched hardware features between CPUs, the + bootloader has booted CPUs in different modes. + - certain drivers are being used on non supported architectures (such as + scsi/snic on something else than x86_64, scsi/ips on non + x86/x86_64/itanium, have broken firmware settings for the + irqchip/irq-gic on arm64 ...). 3) ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all modules were unloaded normally. diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint index 2240cb56e6e5..607b2b280945 100755 --- a/tools/debugging/kernel-chktaint +++ b/tools/debugging/kernel-chktaint @@ -72,7 +72,7 @@ if [ `expr $T % 2` -eq 0 ]; then addout " " else addout "S" - echo " * SMP kernel oops on an officially SMP incapable processor (#2)" + echo " * kernel running on an out of specification system (#2)" fi T=`expr $T / 2` -- cgit v1.3.1 From 4cec85296c7c7a123890d9335b835f991b36e106 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 8 Dec 2020 11:22:52 +0200 Subject: selftests: forwarding: Add Q-in-VNI test Add test to check Q-in-VNI traffic. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/q_in_vni.sh | 347 +++++++++++++++++++++ 1 file changed, 347 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/q_in_vni.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/q_in_vni.sh b/tools/testing/selftests/net/forwarding/q_in_vni.sh new file mode 100755 index 000000000000..4c50c0234bce --- /dev/null +++ b/tools/testing/selftests/net/forwarding/q_in_vni.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1ad) + $swp2 | | +# | | vid 100 pvid untagged vid 100 pvid | | +# | | untagged | | +# | | + vx100 (vxlan) | | +# | | local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1ad) | | | | BR2 (802.1ad) | | +# | | + vx100 (vxlan) | | | | + vx100 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | vid 100 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 100 pvid untagged | | | | | vid 100 pvid untagged | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 + vlan_create $h1 20 v$h1 198.51.100.1/24 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx100 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev vx100 master br1 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 100 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 100 dev $swp2 pvid untagged + + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge vlan del vid 100 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 100 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx100 nomaster + ip link set dev vx100 down + ip link del dev vx100 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + bridge vlan add vid 100 dev w1 pvid untagged + + ip link add name vx100 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx100 up + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx100 master br2 + tc qdisc add dev vx100 clsact + + bridge vlan add vid 100 dev vx100 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/28 + vlan_create w2 20 vw2 $host_addr2/24 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 \ + 192.0.2.3 198.51.100.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 \ + 192.0.2.4 198.51.100.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 ": local->local" + ping_test $h1 192.0.2.3 ": local->remote 1" + ping_test $h1 192.0.2.4 ": local->remote 2" +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS -- cgit v1.3.1 From 477ce6d971159910fb8ae76755c8027aa6a84dde Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 8 Dec 2020 11:22:53 +0200 Subject: selftests: mlxsw: Add Q-in-VNI veto tests Add tests to ensure that the forbidden and unsupported cases are indeed vetoed by mlxsw driver. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh | 77 ++++++++++++++++++++++ .../drivers/net/mlxsw/spectrum/q_in_vni_veto.sh | 66 +++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh create mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh new file mode 100755 index 000000000000..0231205a7147 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../../net/forwarding + +VXPORT=4789 + +ALL_TESTS=" + create_dot1d_and_dot1ad_vxlans +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +create_dot1d_and_dot1ad_vxlans() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 up + + ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \ + "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev $swp1 master br0 + ip link set dev vx100 master br0 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0 + ip link set dev br1 up + + ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \ + "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx200 up + + ip link set dev $swp2 master br1 + ip link set dev vx200 master br1 2>/dev/null + check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected" + + ip link set dev vx200 master br1 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack" + + log_test "create 802.1d and 802.1ad VxLANs" + + ip link del dev vx200 + ip link del dev br1 + ip link del dev vx100 + ip link del dev br0 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh new file mode 100755 index 000000000000..f0443b1b05b9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../../net/forwarding + +VXPORT=4789 + +ALL_TESTS=" + create_vxlan_on_top_of_8021ad_bridge +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +create_vxlan_on_top_of_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 up + + ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \ + "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev $swp1 master br0 + ip link set dev vx100 master br0 + + bridge vlan add vid 100 dev vx100 pvid untagged 2>/dev/null + check_fail $? "802.1ad bridge with VxLAN in Spectrum-1 not rejected" + + bridge vlan add vid 100 dev vx100 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1ad bridge with VxLAN in Spectrum-1 rejected without extack" + + log_test "create VxLAN on top of 802.1ad bridge" + + ip link del dev vx100 + ip link del dev br0 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.3.1 From 0b5b6e747c86e57b7ebd64ccb84314a227ccfcc2 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 9 Dec 2020 14:57:38 +0100 Subject: selftests/bpf/test_offload.py: Remove check for program load flags match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we just removed the xdp_attachment_flags_ok() callback, also remove the check for it in test_offload.py, and replace it with a test for the new ambiguity-avoid check when multiple programs are loaded. Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752225858.110217.13036901876869496246.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 43c9cda199b8..becd27b2f4ba 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -716,13 +716,11 @@ def test_multi_prog(simdev, sim, obj, modename, modeid): fail(ret == 0, "Replaced one of programs without -force") check_extack(err, "XDP program already attached.", args) - if modename == "" or modename == "drv": - othermode = "" if modename == "drv" else "drv" - start_test("Test multi-attachment XDP - detach...") - ret, _, err = sim.unset_xdp(othermode, force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode") - check_extack(err, "program loaded with different flags.", args) + start_test("Test multi-attachment XDP - remove without mode...") + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program without a mode flag") + check_extack(err, "More than one program loaded, unset mode is ambiguous.", args) sim.unset_xdp("offload") xdp = sim.ip_link_show(xdp=True)["xdp"] @@ -1001,16 +999,6 @@ try: check_extack(err, "native and generic XDP can't be active at the same time.", args) - ret, _, err = sim.set_xdp(obj, "", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Replaced XDP program with a program in different mode") - check_extack(err, "program loaded with different flags.", args) - - start_test("Test XDP prog remove with bad flags...") - ret, _, err = sim.unset_xdp("", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode") - check_extack(err, "program loaded with different flags.", args) start_test("Test MTU restrictions...") ret, _ = sim.set_mtu(9000, fail=False) -- cgit v1.3.1 From d8b5e76ae4e02908d000397597c6bc2868362fbb Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 9 Dec 2020 14:57:40 +0100 Subject: selftests/bpf/test_offload.py: Only check verifier log on verification fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6f8a57ccf851 ("bpf: Make verifier log more relevant by default"), the verifier discards log messages for successfully-verified programs. This broke test_offload.py which is looking for a verification message from the driver callback. Change test_offload.py to use the toggle in netdevsim to make the verification fail before looking for the verification message. Fixes: 6f8a57ccf851 ("bpf: Make verifier log more relevant by default") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752226069.110217.12370824996153348073.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index becd27b2f4ba..61527b43f067 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -911,11 +911,18 @@ try: sim.tc_flush_filters() + start_test("Test TC offloads failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter did not reject with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + start_test("Test TC offloads work...") ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, fail=False, include_stderr=True) fail(ret != 0, "TC filter did not load with TC offloads enabled") - check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test TC offload basics...") dfs = simdev.dfs_get_bound_progs(expected=1) @@ -1032,6 +1039,15 @@ try: rm("/sys/fs/bpf/offload") sim.wait_for_flush() + start_test("Test XDP load failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload", + dev=sim['ifname'], fail=False, include_stderr=True) + fail(ret == 0, "verifier should fail on load") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + sim.wait_for_flush() + start_test("Test XDP offload...") _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) ipl = sim.ip_link_show(xdp=True) @@ -1039,7 +1055,6 @@ try: progs = bpftool_prog_list(expected=1) prog = progs[0] fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") - check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test XDP offload is device bound...") dfs = simdev.dfs_get_bound_progs(expected=1) -- cgit v1.3.1 From 852c2ee338f0ac6026458615b624e1c496142cf6 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 9 Dec 2020 14:57:41 +0100 Subject: selftests/bpf/test_offload.py: Fix expected case of extack messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") changed the case of some of the extack messages being returned when attaching of XDP programs failed. This broke test_offload.py, so let's fix the test to reflect this. Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752226175.110217.11214100824416344952.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 61527b43f067..51a5e4d939cc 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -1004,7 +1004,7 @@ try: fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") check_extack(err, - "native and generic XDP can't be active at the same time.", + "Native and generic XDP can't be active at the same time.", args) start_test("Test MTU restrictions...") @@ -1035,7 +1035,7 @@ try: offload = bpf_pinned("/sys/fs/bpf/offload") ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True) fail(ret == 0, "attached offloaded XDP program to drv") - check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args) + check_extack(err, "Using device-bound program without HW_MODE flag is not supported.", args) rm("/sys/fs/bpf/offload") sim.wait_for_flush() -- cgit v1.3.1 From 766e62b7fcd2cf1d43e6594ba37c659dc48f7ddb Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 9 Dec 2020 14:57:42 +0100 Subject: selftests/bpf/test_offload.py: Reset ethtool features after failed setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When setting the ethtool feature flag fails (as expected for the test), the kernel now tracks that the feature was requested to be 'off' and refuses to subsequently disable it again. So reset it back to 'on' so a subsequent disable (that's not supposed to fail) can succeed. Fixes: 417ec26477a5 ("selftests/bpf: add offload test based on netdevsim") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752226280.110217.10696241563705667871.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 51a5e4d939cc..2128fbd8414b 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -946,6 +946,7 @@ try: start_test("Test disabling TC offloads is rejected while filters installed...") ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + sim.set_ethtool_tc_offloads(True) start_test("Test qdisc removal frees things...") sim.tc_flush_filters() -- cgit v1.3.1 From 8158cad13435639cd4962fb88970960f880ef6d9 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 9 Dec 2020 14:57:43 +0100 Subject: selftests/bpf/test_offload.py: Filter bpftool internal map when counting maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few of the tests in test_offload.py expects to see a certain number of maps created, and checks this by counting the number of maps returned by bpftool. There is already a filter that will remove any maps already there at the beginning of the test, but bpftool now creates a map for the PID iterator rodata on each invocation, which makes the map count wrong. Fix this by also filtering the pid_iter.rodata map by name when counting. Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/160752226387.110217.9887866138149423444.stgit@toke.dk --- tools/testing/selftests/bpf/test_offload.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 2128fbd8414b..b99bb8ed3ed4 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -184,9 +184,7 @@ def bpftool_prog_list(expected=None, ns=""): def bpftool_map_list(expected=None, ns=""): _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) # Remove the base maps - for m in base_maps: - if m in maps: - maps.remove(m) + maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names] if expected is not None: if len(maps) != expected: fail(True, "%d BPF maps loaded, expected %d" % @@ -770,6 +768,9 @@ ret, progs = bpftool("prog", fail=False) skip(ret != 0, "bpftool not installed") base_progs = progs _, base_maps = bpftool("map") +base_map_names = [ + 'pid_iter.rodata' # created on each bpftool invocation +] # Check netdevsim ret, out = cmd("modprobe netdevsim", fail=False) -- cgit v1.3.1 From a89052572ebbf4bcee7c39390640e92b60eaa653 Mon Sep 17 00:00:00 2001 From: Weqaar Janjua Date: Mon, 7 Dec 2020 21:53:29 +0000 Subject: selftests/bpf: Xsk selftests framework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds AF_XDP selftests framework under selftests/bpf. Topology: --------- ----------- ----------- | xskX | --------- | xskY | ----------- | ----------- | | | ----------- | ---------- | vethX | --------- | vethY | ----------- peer ---------- | | | namespaceX | namespaceY Prerequisites setup by script test_xsk.sh: Set up veth interfaces as per the topology shown ^^: * setup two veth interfaces and one namespace ** veth in root namespace ** veth in af_xdp namespace ** namespace af_xdp * create a spec file veth.spec that includes this run-time configuration *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid conflict with any existing interface * tests the veth and xsk layers of the topology Signed-off-by: Weqaar Janjua Signed-off-by: Daniel Borkmann Tested-by: Yonghong Song Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201207215333.11586-2-weqaar.a.janjua@intel.com --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/test_xsk.sh | 152 +++++++++++++++++++++++++++++ tools/testing/selftests/bpf/xsk_prereqs.sh | 119 ++++++++++++++++++++++ 3 files changed, 274 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/bpf/test_xsk.sh create mode 100755 tools/testing/selftests/bpf/xsk_prereqs.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index ac25ba5d0d6c..6a1ddfe68f15 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -46,7 +46,8 @@ endif TEST_GEN_FILES = TEST_FILES = test_lwt_ip_encap.o \ - test_tc_edt.o + test_tc_edt.o \ + xsk_prereqs.sh # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -70,6 +71,7 @@ TEST_PROGS := test_kmod.sh \ test_bpftool_build.sh \ test_bpftool.sh \ test_bpftool_metadata.sh \ + test_xsk.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh new file mode 100755 index 000000000000..cae4c5574c4c --- /dev/null +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2020 Intel Corporation, Weqaar Janjua + +# AF_XDP selftests based on veth +# +# End-to-end AF_XDP over Veth test +# +# Topology: +# --------- +# ----------- ----------- +# | xskX | --------- | xskY | +# ----------- | ----------- +# | | | +# ----------- | ---------- +# | vethX | --------- | vethY | +# ----------- peer ---------- +# | | | +# namespaceX | namespaceY +# +# AF_XDP is an address family optimized for high performance packet processing, +# it is XDP’s user-space interface. +# +# An AF_XDP socket is linked to a single UMEM which is a region of virtual +# contiguous memory, divided into equal-sized frames. +# +# Refer to AF_XDP Kernel Documentation for detailed information: +# https://www.kernel.org/doc/html/latest/networking/af_xdp.html +# +# Prerequisites setup by script: +# +# Set up veth interfaces as per the topology shown ^^: +# * setup two veth interfaces and one namespace +# ** veth in root namespace +# ** veth in af_xdp namespace +# ** namespace af_xdp +# * create a spec file veth.spec that includes this run-time configuration +# *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid +# conflict with any existing interface +# * tests the veth and xsk layers of the topology +# +# Kernel configuration: +# --------------------- +# See "config" file for recommended kernel config options. +# +# Turn on XDP sockets and veth support when compiling i.e. +# Networking support --> +# Networking options --> +# [ * ] XDP sockets +# +# Executing Tests: +# ---------------- +# Must run with CAP_NET_ADMIN capability. +# +# Run (full color-coded output): +# sudo ./test_xsk.sh -c +# +# If running from kselftests: +# sudo make colorconsole=1 run_tests +# +# Run (full output without color-coding): +# sudo ./test_xsk.sh + +. xsk_prereqs.sh + +while getopts c flag +do + case "${flag}" in + c) colorconsole=1;; + esac +done + +TEST_NAME="PREREQUISITES" + +URANDOM=/dev/urandom +[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; } + +VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4) +VETH0=ve${VETH0_POSTFIX} +VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4) +VETH1=ve${VETH1_POSTFIX} +NS0=root +NS1=af_xdp${VETH1_POSTFIX} +MTU=1500 + +setup_vethPairs() { + echo "setting up ${VETH0}: namespace: ${NS0}" + ip netns add ${NS1} + ip link add ${VETH0} type veth peer name ${VETH1} + if [ -f /proc/net/if_inet6 ]; then + echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6 + fi + echo "setting up ${VETH1}: namespace: ${NS1}" + ip link set ${VETH1} netns ${NS1} + ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU} + ip link set ${VETH0} mtu ${MTU} + ip netns exec ${NS1} ip link set ${VETH1} up + ip link set ${VETH0} up +} + +validate_root_exec +validate_veth_support ${VETH0} +validate_ip_utility +setup_vethPairs + +retval=$? +if [ $retval -ne 0 ]; then + test_status $retval "${TEST_NAME}" + cleanup_exit ${VETH0} ${VETH1} ${NS1} + exit $retval +fi + +echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE} + +validate_veth_spec_file + +echo "Spec file created: ${SPECFILE}" + +test_status $retval "${TEST_NAME}" + +## START TESTS + +statusList=() + +### TEST 1 +TEST_NAME="XSK KSELFTEST FRAMEWORK" + +echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode" +vethXDPgeneric ${VETH0} ${VETH1} ${NS1} + +retval=$? +if [ $retval -eq 0 ]; then + echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode" + vethXDPnative ${VETH0} ${VETH1} ${NS1} +fi + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + +## END TESTS + +cleanup_exit ${VETH0} ${VETH1} ${NS1} + +for _status in "${statusList[@]}" +do + if [ $_status -ne 0 ]; then + test_exit $ksft_fail 0 + fi +done + +test_exit $ksft_pass 0 diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh new file mode 100755 index 000000000000..29762739c21b --- /dev/null +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2020 Intel Corporation. + +ksft_pass=0 +ksft_fail=1 +ksft_xfail=2 +ksft_xpass=3 +ksft_skip=4 + +GREEN='\033[0;92m' +YELLOW='\033[0;93m' +RED='\033[0;31m' +NC='\033[0m' +STACK_LIM=131072 +SPECFILE=veth.spec + +validate_root_exec() +{ + msg="skip all tests:" + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + test_exit $ksft_fail 2 + else + return $ksft_pass + fi +} + +validate_veth_support() +{ + msg="skip all tests:" + if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then + echo $msg veth kernel support not available >&2 + test_exit $ksft_skip 1 + else + ip link del $1 + return $ksft_pass + fi +} + +validate_veth_spec_file() +{ + if [ ! -f ${SPECFILE} ]; then + test_exit $ksft_skip 1 + fi +} + +test_status() +{ + statusval=$1 + if [ -n "${colorconsole+set}" ]; then + if [ $statusval -eq 2 ]; then + echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]" + elif [ $statusval -eq 1 ]; then + echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]" + elif [ $statusval -eq 0 ]; then + echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]" + fi + else + if [ $statusval -eq 2 ]; then + echo -e "$2: [ FAIL ]" + elif [ $statusval -eq 1 ]; then + echo -e "$2: [ SKIPPED ]" + elif [ $statusval -eq 0 ]; then + echo -e "$2: [ PASS ]" + fi + fi +} + +test_exit() +{ + retval=$1 + if [ $2 -ne 0 ]; then + test_status $2 $(basename $0) + fi + exit $retval +} + +clear_configs() +{ + if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then + [ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] && + { echo "removing link $1:$2"; ip netns exec $3 ip link del $2; } + echo "removing ns $3" + ip netns del $3 + fi + #Once we delete a veth pair node, the entire veth pair is removed, + #this is just to be cautious just incase the NS does not exist then + #veth node inside NS won't get removed so we explicitly remove it + [ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] && + { echo "removing link $1"; ip link del $1; } + if [ -f ${SPECFILE} ]; then + echo "removing spec file:" ${SPECFILE} + rm -f ${SPECFILE} + fi +} + +cleanup_exit() +{ + echo "cleaning up..." + clear_configs $1 $2 $3 +} + +validate_ip_utility() +{ + [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; } +} + +vethXDPgeneric() +{ + ip link set dev $1 xdpdrv off + ip netns exec $3 ip link set dev $2 xdpdrv off +} + +vethXDPnative() +{ + ip link set dev $1 xdpgeneric off + ip netns exec $3 ip link set dev $2 xdpgeneric off +} -- cgit v1.3.1 From facb7cb2e909ad2d21ebbfdc051726d4cd8f1d35 Mon Sep 17 00:00:00 2001 From: Weqaar Janjua Date: Mon, 7 Dec 2020 21:53:30 +0000 Subject: selftests/bpf: Xsk selftests - SKB POLL, NOPOLL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds following tests: 1. AF_XDP SKB mode Generic mode XDP is driver independent, used when the driver does not have support for XDP. Works on any netdevice using sockets and generic XDP path. XDP hook from netif_receive_skb(). a. nopoll - soft-irq processing b. poll - using poll() syscall Signed-off-by: Weqaar Janjua Signed-off-by: Daniel Borkmann Tested-by: Yonghong Song Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201207215333.11586-3-weqaar.a.janjua@intel.com --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/test_xsk.sh | 39 +- tools/testing/selftests/bpf/xdpxceiver.c | 979 +++++++++++++++++++++++++++++ tools/testing/selftests/bpf/xdpxceiver.h | 153 +++++ tools/testing/selftests/bpf/xsk_prereqs.sh | 16 + 5 files changed, 1187 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/xdpxceiver.c create mode 100644 tools/testing/selftests/bpf/xdpxceiver.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6a1ddfe68f15..944ae17a39ed 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -82,7 +82,8 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ - test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko + test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ + xdpxceiver TEST_CUSTOM_PROGS = urandom_read diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index cae4c5574c4c..0b7bafb65f43 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -8,8 +8,17 @@ # # Topology: # --------- -# ----------- ----------- -# | xskX | --------- | xskY | +# ----------- +# _ | Process | _ +# / ----------- \ +# / | \ +# / | \ +# ----------- | ----------- +# | Thread1 | | | Thread2 | +# ----------- | ----------- +# | | | +# ----------- | ----------- +# | xskX | | | xskY | # ----------- | ----------- # | | | # ----------- | ---------- @@ -39,6 +48,8 @@ # conflict with any existing interface # * tests the veth and xsk layers of the topology # +# See the source xdpxceiver.c for information on each test +# # Kernel configuration: # --------------------- # See "config" file for recommended kernel config options. @@ -138,6 +149,30 @@ retval=$? test_status $retval "${TEST_NAME}" statusList+=($retval) +### TEST 2 +TEST_NAME="SKB NOPOLL" + +vethXDPgeneric ${VETH0} ${VETH1} ${NS1} + +params=("-S") +execxdpxceiver params + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + +### TEST 3 +TEST_NAME="SKB POLL" + +vethXDPgeneric ${VETH0} ${VETH1} ${NS1} + +params=("-S" "-p") +execxdpxceiver params + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + ## END TESTS cleanup_exit ${VETH0} ${VETH1} ${NS1} diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c new file mode 100644 index 000000000000..3f2a65b6a9f5 --- /dev/null +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -0,0 +1,979 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 Intel Corporation. */ + +/* + * Some functions in this program are taken from + * Linux kernel samples/bpf/xdpsock* and modified + * for use. + * + * See test_xsk.sh for detailed information on test topology + * and prerequisite network setup. + * + * This test program contains two threads, each thread is single socket with + * a unique UMEM. It validates in-order packet delivery and packet content + * by sending packets to each other. + * + * Tests Information: + * ------------------ + * These selftests test AF_XDP SKB and Native/DRV modes using veth + * Virtual Ethernet interfaces. + * + * The following tests are run: + * + * 1. AF_XDP SKB mode + * Generic mode XDP is driver independent, used when the driver does + * not have support for XDP. Works on any netdevice using sockets and + * generic XDP path. XDP hook from netif_receive_skb(). + * a. nopoll - soft-irq processing + * b. poll - using poll() syscall + * + * Total tests: 2 + * + * Flow: + * ----- + * - Single process spawns two threads: Tx and Rx + * - Each of these two threads attach to a veth interface within their assigned + * namespaces + * - Each thread Creates one AF_XDP socket connected to a unique umem for each + * veth interface + * - Tx thread Transmits 10k packets from veth to veth + * - Rx thread verifies if all 10k packets were received and delivered in-order, + * and have the right content + * + * Enable/disable debug mode: + * -------------------------- + * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add + * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D") + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +typedef __u16 __sum16; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xdpxceiver.h" +#include "../kselftest.h" + +static void __exit_with_error(int error, const char *file, const char *func, int line) +{ + ksft_test_result_fail + ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error)); + ksft_exit_xfail(); +} + +#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) + +#define print_ksft_result(void)\ + (ksft_test_result_pass("PASS: %s %s\n", uut ? "" : "SKB", opt_poll ? "POLL" : "NOPOLL")) + +static void pthread_init_mutex(void) +{ + pthread_mutex_init(&sync_mutex, NULL); + pthread_mutex_init(&sync_mutex_tx, NULL); + pthread_cond_init(&signal_rx_condition, NULL); + pthread_cond_init(&signal_tx_condition, NULL); +} + +static void pthread_destroy_mutex(void) +{ + pthread_mutex_destroy(&sync_mutex); + pthread_mutex_destroy(&sync_mutex_tx); + pthread_cond_destroy(&signal_rx_condition); + pthread_cond_destroy(&signal_tx_condition); +} + +static void *memset32_htonl(void *dest, u32 val, u32 size) +{ + u32 *ptr = (u32 *)dest; + int i; + + val = htonl(val); + + for (i = 0; i < (size & (~0x3)); i += 4) + ptr[i >> 2] = val; + + for (; i < size; i++) + ((char *)dest)[i] = ((char *)&val)[i & 3]; + + return dest; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static inline unsigned short from32to16(unsigned int x) +{ + /* add up 16-bit and 16-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +/* + * Fold a partial checksum + * This function code has been taken from + * Linux kernel include/asm-generic/checksum.h + */ +static inline __u16 csum_fold(__u32 csum) +{ + u32 sum = (__force u32)csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __u16)~sum; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + +__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum); + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) +{ + unsigned long long s = (__force u32)sum; + + s += (__force u32)saddr; + s += (__force u32)daddr; +#ifdef __BIG_ENDIAN__ + s += proto + len; +#else + s += (proto + len) << 8; +#endif + return (__force __u32)from64to32(s); +} + +/* + * This function has been taken from + * Linux kernel include/asm-generic/checksum.h + */ +static inline __u16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) +{ + u32 csum = 0; + u32 cnt = 0; + + /* udp hdr and data */ + for (; cnt < len; cnt += 2) + csum += udp_pkt[cnt >> 1]; + + return csum_tcpudp_magic(saddr, daddr, len, proto, csum); +} + +static void gen_eth_hdr(void *data, struct ethhdr *eth_hdr) +{ + memcpy(eth_hdr->h_dest, ((struct ifobject *)data)->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, ((struct ifobject *)data)->src_mac, ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_IP); +} + +static void gen_ip_hdr(void *data, struct iphdr *ip_hdr) +{ + ip_hdr->version = IP_PKT_VER; + ip_hdr->ihl = 0x5; + ip_hdr->tos = IP_PKT_TOS; + ip_hdr->tot_len = htons(IP_PKT_SIZE); + ip_hdr->id = 0; + ip_hdr->frag_off = 0; + ip_hdr->ttl = IPDEFTTL; + ip_hdr->protocol = IPPROTO_UDP; + ip_hdr->saddr = ((struct ifobject *)data)->src_ip; + ip_hdr->daddr = ((struct ifobject *)data)->dst_ip; + ip_hdr->check = 0; +} + +static void gen_udp_hdr(void *data, void *arg, struct udphdr *udp_hdr) +{ + udp_hdr->source = htons(((struct ifobject *)arg)->src_port); + udp_hdr->dest = htons(((struct ifobject *)arg)->dst_port); + udp_hdr->len = htons(UDP_PKT_SIZE); + memset32_htonl(pkt_data + PKT_HDR_SIZE, + htonl(((struct generic_data *)data)->seqnum), UDP_PKT_DATA_SIZE); +} + +static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) +{ + udp_hdr->check = 0; + udp_hdr->check = + udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); +} + +static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) +{ + memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE); +} + +static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size) +{ + int ret; + + data->umem = calloc(1, sizeof(struct xsk_umem_info)); + if (!data->umem) + exit_with_error(errno); + + ret = xsk_umem__create(&data->umem->umem, buffer, size, + &data->umem->fq, &data->umem->cq, NULL); + if (ret) + exit_with_error(ret); + + data->umem->buffer = buffer; +} + +static void xsk_populate_fill_ring(struct xsk_umem_info *umem) +{ + int ret, i; + u32 idx; + + ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); + if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) + exit_with_error(ret); + for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE; + xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); +} + +static int xsk_configure_socket(struct ifobject *ifobject) +{ + struct xsk_socket_config cfg; + struct xsk_ring_cons *rxr; + struct xsk_ring_prod *txr; + int ret; + + ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info)); + if (!ifobject->xsk) + exit_with_error(errno); + + ifobject->xsk->umem = ifobject->umem; + cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg.libbpf_flags = 0; + cfg.xdp_flags = opt_xdp_flags; + cfg.bind_flags = opt_xdp_bind_flags; + + rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL; + txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL; + + ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname, + opt_queue, ifobject->umem->umem, rxr, txr, &cfg); + + if (ret) + return 1; + + return 0; +} + +static struct option long_options[] = { + {"interface", required_argument, 0, 'i'}, + {"queue", optional_argument, 0, 'q'}, + {"poll", no_argument, 0, 'p'}, + {"xdp-skb", no_argument, 0, 'S'}, + {"copy", no_argument, 0, 'c'}, + {"debug", optional_argument, 0, 'D'}, + {"tx-pkt-count", optional_argument, 0, 'C'}, + {0, 0, 0, 0} +}; + +static void usage(const char *prog) +{ + const char *str = + " Usage: %s [OPTIONS]\n" + " Options:\n" + " -i, --interface Use interface\n" + " -q, --queue=n Use queue n (default 0)\n" + " -p, --poll Use poll syscall\n" + " -S, --xdp-skb=n Use XDP SKB mode\n" + " -c, --copy Force copy mode\n" + " -D, --debug Debug mode - dump packets L2 - L5\n" + " -C, --tx-pkt-count=n Number of packets to send\n"; + ksft_print_msg(str, prog); +} + +static bool switch_namespace(int idx) +{ + char fqns[26] = "/var/run/netns/"; + int nsfd; + + strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1); + nsfd = open(fqns, O_RDONLY); + + if (nsfd == -1) + exit_with_error(errno); + + if (setns(nsfd, 0) == -1) + exit_with_error(errno); + + return true; +} + +static void *nsswitchthread(void *args) +{ + if (switch_namespace(((struct targs *)args)->idx)) { + ifdict[((struct targs *)args)->idx]->ifindex = + if_nametoindex(ifdict[((struct targs *)args)->idx]->ifname); + if (!ifdict[((struct targs *)args)->idx]->ifindex) { + ksft_test_result_fail + ("ERROR: [%s] interface \"%s\" does not exist\n", + __func__, ifdict[((struct targs *)args)->idx]->ifname); + ((struct targs *)args)->retptr = false; + } else { + ksft_print_msg("Interface found: %s\n", + ifdict[((struct targs *)args)->idx]->ifname); + ((struct targs *)args)->retptr = true; + } + } else { + ((struct targs *)args)->retptr = false; + } + pthread_exit(NULL); +} + +static int validate_interfaces(void) +{ + bool ret = true; + + for (int i = 0; i < MAX_INTERFACES; i++) { + if (!strcmp(ifdict[i]->ifname, "")) { + ret = false; + ksft_test_result_fail("ERROR: interfaces: -i , -i ,."); + } + if (strcmp(ifdict[i]->nsname, "")) { + struct targs *targs; + + targs = (struct targs *)malloc(sizeof(struct targs)); + if (!targs) + exit_with_error(errno); + + targs->idx = i; + if (pthread_create(&ns_thread, NULL, nsswitchthread, (void *)targs)) + exit_with_error(errno); + + pthread_join(ns_thread, NULL); + + if (targs->retptr) + ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname); + + free(targs); + } else { + ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname); + if (!ifdict[i]->ifindex) { + ksft_test_result_fail + ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname); + ret = false; + } else { + ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname); + } + } + } + return ret; +} + +static void parse_command_line(int argc, char **argv) +{ + int option_index, interface_index = 0, c; + + opterr = 0; + + for (;;) { + c = getopt_long(argc, argv, "i:q:pScDC:", long_options, &option_index); + + if (c == -1) + break; + + switch (c) { + case 'i': + if (interface_index == MAX_INTERFACES) + break; + char *sptr, *token; + + sptr = strndupa(optarg, strlen(optarg)); + memcpy(ifdict[interface_index]->ifname, + strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); + token = strsep(&sptr, ","); + if (token) + memcpy(ifdict[interface_index]->nsname, token, + MAX_INTERFACES_NAMESPACE_CHARS); + interface_index++; + break; + case 'q': + opt_queue = atoi(optarg); + break; + case 'p': + opt_poll = 1; + break; + case 'S': + opt_xdp_flags |= XDP_FLAGS_SKB_MODE; + opt_xdp_bind_flags |= XDP_COPY; + uut = ORDER_CONTENT_VALIDATE_XDP_SKB; + break; + case 'c': + opt_xdp_bind_flags |= XDP_COPY; + break; + case 'D': + debug_pkt_dump = 1; + break; + case 'C': + opt_pkt_count = atoi(optarg); + break; + default: + usage(basename(argv[0])); + ksft_exit_xfail(); + } + } + + if (!validate_interfaces()) { + usage(basename(argv[0])); + ksft_exit_xfail(); + } +} + +static void kick_tx(struct xsk_socket_info *xsk) +{ + int ret; + + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) + return; + exit_with_error(errno); +} + +static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size) +{ + unsigned int rcvd; + u32 idx; + + if (!xsk->outstanding_tx) + return; + + if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); + + rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); + if (rcvd) { + xsk_ring_cons__release(&xsk->umem->cq, rcvd); + xsk->outstanding_tx -= rcvd; + xsk->tx_npkts += rcvd; + } +} + +static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds) +{ + unsigned int rcvd, i; + u32 idx_rx = 0, idx_fq = 0; + int ret; + + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) { + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret < 0) + exit_with_error(ret); + } + return; + } + + ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (ret < 0) + exit_with_error(ret); + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret < 0) + exit_with_error(ret); + } + ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + } + + for (i = 0; i < rcvd; i++) { + u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; + (void)xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; + u64 orig = xsk_umem__extract_addr(addr); + + addr = xsk_umem__add_offset_to_addr(addr); + pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE); + if (!pkt_node_rx) + exit_with_error(errno); + + pkt_node_rx->pkt_frame = (char *)malloc(PKT_SIZE); + if (!pkt_node_rx->pkt_frame) + exit_with_error(errno); + + memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr), + PKT_SIZE); + + TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes); + + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; + } + + xsk_ring_prod__submit(&xsk->umem->fq, rcvd); + xsk_ring_cons__release(&xsk->rx, rcvd); + xsk->rx_npkts += rcvd; +} + +static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size) +{ + u32 idx; + unsigned int i; + + while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size) + complete_tx_only(xsk, batch_size); + + for (i = 0; i < batch_size; i++) { + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); + + tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; + tx_desc->len = PKT_SIZE; + } + + xsk_ring_prod__submit(&xsk->tx, batch_size); + xsk->outstanding_tx += batch_size; + *frameptr += batch_size; + *frameptr %= num_frames; + complete_tx_only(xsk, batch_size); +} + +static inline int get_batch_size(int pkt_cnt) +{ + if (!opt_pkt_count) + return BATCH_SIZE; + + if (pkt_cnt + BATCH_SIZE <= opt_pkt_count) + return BATCH_SIZE; + + return opt_pkt_count - pkt_cnt; +} + +static void complete_tx_only_all(void *arg) +{ + bool pending; + + do { + pending = false; + if (((struct ifobject *)arg)->xsk->outstanding_tx) { + complete_tx_only(((struct ifobject *) + arg)->xsk, BATCH_SIZE); + pending = !!((struct ifobject *)arg)->xsk->outstanding_tx; + } + } while (pending); +} + +static void tx_only_all(void *arg) +{ + struct pollfd fds[MAX_SOCKS] = { }; + u32 frame_nb = 0; + int pkt_cnt = 0; + int ret; + + fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk); + fds[0].events = POLLOUT; + + while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { + int batch_size = get_batch_size(pkt_cnt); + + if (opt_poll) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret <= 0) + continue; + + if (!(fds[0].revents & POLLOUT)) + continue; + } + + tx_only(((struct ifobject *)arg)->xsk, &frame_nb, batch_size); + pkt_cnt += batch_size; + } + + if (opt_pkt_count) + complete_tx_only_all(arg); +} + +static void worker_pkt_dump(void) +{ + struct in_addr ipaddr; + + fprintf(stdout, "---------------------------------------\n"); + for (int iter = 0; iter < num_frames - 1; iter++) { + /*extract L2 frame */ + fprintf(stdout, "DEBUG>> L2: dst mac: "); + for (int i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ((struct ethhdr *) + pkt_buf[iter]->payload)->h_dest[i]); + + fprintf(stdout, "\nDEBUG>> L2: src mac: "); + for (int i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ((struct ethhdr *) + pkt_buf[iter]->payload)->h_source[i]); + + /*extract L3 frame */ + fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", + ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl); + + ipaddr.s_addr = + ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr; + fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr)); + + ipaddr.s_addr = + ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr; + fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr)); + + /*extract L4 frame */ + fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", + ntohs(((struct udphdr *)(pkt_buf[iter]->payload + + sizeof(struct ethhdr) + + sizeof(struct iphdr)))->source)); + + fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", + ntohs(((struct udphdr *)(pkt_buf[iter]->payload + + sizeof(struct ethhdr) + + sizeof(struct iphdr)))->dest)); + /*extract L5 frame */ + int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); + + if (payload == EOT) { + ksft_print_msg("End-of-tranmission frame received\n"); + fprintf(stdout, "---------------------------------------\n"); + break; + } + fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); + fprintf(stdout, "---------------------------------------\n"); + } +} + +static void worker_pkt_validate(void) +{ + u32 payloadseqnum = -2; + + while (1) { + pkt_node_rx_q = malloc(sizeof(struct pkt)); + pkt_node_rx_q = TAILQ_LAST(&head, head_s); + if (!pkt_node_rx_q) + break; + /*do not increment pktcounter if !(tos=0x9 and ipv4) */ + if ((((struct iphdr *)(pkt_node_rx_q->pkt_frame + + sizeof(struct ethhdr)))->version == IP_PKT_VER) + && (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos == + IP_PKT_TOS)) { + payloadseqnum = *((uint32_t *) (pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE)); + if (debug_pkt_dump && payloadseqnum != EOT) { + pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame)); + pkt_obj->payload = (char *)malloc(PKT_SIZE); + memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE); + pkt_buf[payloadseqnum] = pkt_obj; + } + + if (payloadseqnum == EOT) { + ksft_print_msg("End-of-tranmission frame received: PASS\n"); + sigvar = 1; + break; + } + + if (prev_pkt + 1 != payloadseqnum) { + ksft_test_result_fail + ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n", + __func__, prev_pkt, payloadseqnum); + ksft_exit_xfail(); + } + + TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); + free(pkt_node_rx_q->pkt_frame); + free(pkt_node_rx_q); + pkt_node_rx_q = NULL; + prev_pkt = payloadseqnum; + pkt_counter++; + } else { + ksft_print_msg("Invalid frame received: "); + ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", + ((struct iphdr *)(pkt_node_rx_q->pkt_frame + + sizeof(struct ethhdr)))->version, + ((struct iphdr *)(pkt_node_rx_q->pkt_frame + + sizeof(struct ethhdr)))->tos); + TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); + free(pkt_node_rx_q->pkt_frame); + free(pkt_node_rx_q); + pkt_node_rx_q = NULL; + } + } +} + +static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr, + atomic_int *spinningptr) +{ + int ctr = 0; + int ret; + + xsk_configure_umem((struct ifobject *)arg, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); + ret = xsk_configure_socket((struct ifobject *)arg); + + /* Retry Create Socket if it fails as xsk_socket__create() + * is asynchronous + * + * Essential to lock Mutex here to prevent Tx thread from + * entering before Rx and causing a deadlock + */ + pthread_mutex_lock(mutexptr); + while (ret && ctr < SOCK_RECONF_CTR) { + atomic_store(spinningptr, 1); + xsk_configure_umem((struct ifobject *)arg, + bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); + ret = xsk_configure_socket((struct ifobject *)arg); + usleep(USLEEP_MAX); + ctr++; + } + atomic_store(spinningptr, 0); + pthread_mutex_unlock(mutexptr); + + if (ctr >= SOCK_RECONF_CTR) + exit_with_error(ret); +} + +static void *worker_testapp_validate(void *arg) +{ + struct udphdr *udp_hdr = + (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr)); + struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data)); + struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr)); + struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; + void *bufs; + + pthread_attr_setstacksize(&attr, THREAD_STACK); + + bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (bufs == MAP_FAILED) + exit_with_error(errno); + + if (strcmp(((struct ifobject *)arg)->nsname, "")) + switch_namespace(((struct ifobject *)arg)->ifdict_index); + + if (((struct ifobject *)arg)->fv.vector == tx) { + int spinningrxctr = 0; + + thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx); + + while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) { + spinningrxctr++; + usleep(USLEEP_MAX); + } + + ksft_print_msg("Interface [%s] vector [Tx]\n", ((struct ifobject *)arg)->ifname); + for (int i = 0; i < num_frames; i++) { + /*send EOT frame */ + if (i == (num_frames - 1)) + data->seqnum = -1; + else + data->seqnum = i; + gen_udp_hdr((void *)data, (void *)arg, udp_hdr); + gen_ip_hdr((void *)arg, ip_hdr); + gen_udp_csum(udp_hdr, ip_hdr); + gen_eth_hdr((void *)arg, eth_hdr); + gen_eth_frame(((struct ifobject *)arg)->umem, + i * XSK_UMEM__DEFAULT_FRAME_SIZE); + } + + free(data); + ksft_print_msg("Sending %d packets on interface %s\n", + (opt_pkt_count - 1), ((struct ifobject *)arg)->ifname); + tx_only_all(arg); + } else if (((struct ifobject *)arg)->fv.vector == rx) { + struct pollfd fds[MAX_SOCKS] = { }; + int ret; + + thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx); + + ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname); + xsk_populate_fill_ring(((struct ifobject *)arg)->umem); + + TAILQ_INIT(&head); + if (debug_pkt_dump) { + pkt_buf = malloc(sizeof(struct pkt_frame **) * num_frames); + if (!pkt_buf) + exit_with_error(errno); + } + + fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk); + fds[0].events = POLLIN; + + pthread_mutex_lock(&sync_mutex); + pthread_cond_signal(&signal_rx_condition); + pthread_mutex_unlock(&sync_mutex); + + while (1) { + if (opt_poll) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret <= 0) + continue; + } + rx_pkt(((struct ifobject *)arg)->xsk, fds); + worker_pkt_validate(); + + if (sigvar) + break; + } + + ksft_print_msg("Received %d packets on interface %s\n", + pkt_counter, ((struct ifobject *)arg)->ifname); + } + + xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk); + (void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem); + pthread_exit(NULL); +} + +static void testapp_validate(void) +{ + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, THREAD_STACK); + + pthread_mutex_lock(&sync_mutex); + + /*Spawn RX thread */ + if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1])) + exit_with_error(errno); + + struct timespec max_wait = { 0, 0 }; + + if (clock_gettime(CLOCK_REALTIME, &max_wait)) + exit_with_error(errno); + max_wait.tv_sec += TMOUT_SEC; + + if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT) + exit_with_error(errno); + + pthread_mutex_unlock(&sync_mutex); + + /*Spawn TX thread */ + if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0])) + exit_with_error(errno); + + pthread_join(t1, NULL); + pthread_join(t0, NULL); + + if (debug_pkt_dump) { + worker_pkt_dump(); + for (int iter = 0; iter < num_frames - 1; iter++) { + free(pkt_buf[iter]->payload); + free(pkt_buf[iter]); + } + free(pkt_buf); + } + + print_ksft_result(); +} + +static void init_iface_config(void *ifaceconfig) +{ + /*Init interface0 */ + ifdict[0]->fv.vector = tx; + memcpy(ifdict[0]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN); + memcpy(ifdict[0]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN); + ifdict[0]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr; + ifdict[0]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr; + ifdict[0]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port; + ifdict[0]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port; + + /*Init interface1 */ + ifdict[1]->fv.vector = rx; + memcpy(ifdict[1]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN); + memcpy(ifdict[1]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN); + ifdict[1]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr; + ifdict[1]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr; + ifdict[1]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port; + ifdict[1]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port; +} + +int main(int argc, char **argv) +{ + struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; + + if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) + exit_with_error(errno); + + const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; + const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; + const char *IP1 = "192.168.100.162"; + const char *IP2 = "192.168.100.161"; + u16 UDP_DST_PORT = 2020; + u16 UDP_SRC_PORT = 2121; + + ifaceconfig = (struct ifaceconfigobj *)malloc(sizeof(struct ifaceconfigobj)); + memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN); + memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN); + inet_aton(IP1, &ifaceconfig->dst_ip); + inet_aton(IP2, &ifaceconfig->src_ip); + ifaceconfig->dst_port = UDP_DST_PORT; + ifaceconfig->src_port = UDP_SRC_PORT; + + for (int i = 0; i < MAX_INTERFACES; i++) { + ifdict[i] = (struct ifobject *)malloc(sizeof(struct ifobject)); + if (!ifdict[i]) + exit_with_error(errno); + + ifdict[i]->ifdict_index = i; + } + + setlocale(LC_ALL, ""); + + parse_command_line(argc, argv); + + num_frames = ++opt_pkt_count; + + init_iface_config((void *)ifaceconfig); + + pthread_init_mutex(); + + ksft_set_plan(1); + + testapp_validate(); + + for (int i = 0; i < MAX_INTERFACES; i++) + free(ifdict[i]); + + pthread_destroy_mutex(); + + ksft_exit_pass(); + + return 0; +} diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h new file mode 100644 index 000000000000..5929f2fc1224 --- /dev/null +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2020 Intel Corporation. + */ + +#ifndef XDPXCEIVER_H_ +#define XDPXCEIVER_H_ + +#ifndef SOL_XDP +#define SOL_XDP 283 +#endif + +#ifndef AF_XDP +#define AF_XDP 44 +#endif + +#ifndef PF_XDP +#define PF_XDP AF_XDP +#endif + +#define MAX_INTERFACES 2 +#define MAX_INTERFACE_NAME_CHARS 7 +#define MAX_INTERFACES_NAMESPACE_CHARS 10 +#define MAX_SOCKS 1 +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct udphdr)) +#define MIN_PKT_SIZE 64 +#define ETH_FCS_SIZE 4 +#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE) +#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) +#define IP_PKT_VER 0x4 +#define IP_PKT_TOS 0x9 +#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) +#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) +#define TMOUT_SEC (3) +#define EOT (-1) +#define USLEEP_MAX 200000 +#define THREAD_STACK 60000000 +#define SOCK_RECONF_CTR 10 +#define BATCH_SIZE 64 +#define POLL_TMOUT 1000 +#define NEED_WAKEUP 1 + +typedef __u32 u32; +typedef __u16 u16; +typedef __u8 u8; + +enum TESTS { + ORDER_CONTENT_VALIDATE_XDP_SKB = 0, +}; + +u8 uut; +u8 debug_pkt_dump; +u32 num_frames; + +static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static int opt_queue; +static int opt_pkt_count; +static int opt_poll; +static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; +static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; +static u32 pkt_counter; +static u32 prev_pkt = -1; +static int sigvar; + +struct xsk_umem_info { + struct xsk_ring_prod fq; + struct xsk_ring_cons cq; + struct xsk_umem *umem; + void *buffer; +}; + +struct xsk_socket_info { + struct xsk_ring_cons rx; + struct xsk_ring_prod tx; + struct xsk_umem_info *umem; + struct xsk_socket *xsk; + unsigned long rx_npkts; + unsigned long tx_npkts; + unsigned long prev_rx_npkts; + unsigned long prev_tx_npkts; + u32 outstanding_tx; +}; + +struct flow_vector { + enum fvector { + tx, + rx, + bidi, + undef, + } vector; +}; + +struct generic_data { + u32 seqnum; +}; + +struct ifaceconfigobj { + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + struct in_addr dst_ip; + struct in_addr src_ip; + u16 src_port; + u16 dst_port; +} *ifaceconfig; + +struct ifobject { + int ifindex; + int ifdict_index; + char ifname[MAX_INTERFACE_NAME_CHARS]; + char nsname[MAX_INTERFACES_NAMESPACE_CHARS]; + struct flow_vector fv; + struct xsk_socket_info *xsk; + struct xsk_umem_info *umem; + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + u32 dst_ip; + u32 src_ip; + u16 src_port; + u16 dst_port; +}; + +static struct ifobject *ifdict[MAX_INTERFACES]; + +/*threads*/ +atomic_int spinning_tx; +atomic_int spinning_rx; +pthread_mutex_t sync_mutex; +pthread_mutex_t sync_mutex_tx; +pthread_cond_t signal_rx_condition; +pthread_cond_t signal_tx_condition; +pthread_t t0, t1, ns_thread; +pthread_attr_t attr; + +struct targs { + bool retptr; + int idx; +}; + +TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head); +struct head_s *head_p; +struct pkt { + char *pkt_frame; + + TAILQ_ENTRY(pkt) pkt_nodes; +} *pkt_node_rx, *pkt_node_rx_q; + +struct pkt_frame { + char *payload; +} *pkt_obj; + +struct pkt_frame **pkt_buf; + +#endif /* XDPXCEIVER_H */ diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index 29762739c21b..9d54c4645127 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -14,6 +14,8 @@ RED='\033[0;31m' NC='\033[0m' STACK_LIM=131072 SPECFILE=veth.spec +XSKOBJ=xdpxceiver +NUMPKTS=10000 validate_root_exec() { @@ -117,3 +119,17 @@ vethXDPnative() ip link set dev $1 xdpgeneric off ip netns exec $3 ip link set dev $2 xdpgeneric off } + +execxdpxceiver() +{ + local -a 'paramkeys=("${!'"$1"'[@]}")' copy + paramkeysstr=${paramkeys[*]} + + for index in $paramkeysstr; + do + current=$1"[$index]" + copy[$index]=${!current} + done + + ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS} +} -- cgit v1.3.1 From 9103a8594d9324d8e1512442ba580e4e91d42c2d Mon Sep 17 00:00:00 2001 From: Weqaar Janjua Date: Mon, 7 Dec 2020 21:53:31 +0000 Subject: selftests/bpf: Xsk selftests - DRV POLL, NOPOLL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds following tests: 2. AF_XDP DRV/Native mode Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes packets before SKB allocation. Provides better performance than SKB. Driver hook available just after DMA of buffer descriptor. a. nopoll b. poll * Only copy mode is supported because veth does not currently support zero-copy mode Signed-off-by: Weqaar Janjua Signed-off-by: Daniel Borkmann Tested-by: Yonghong Song Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201207215333.11586-4-weqaar.a.janjua@intel.com --- tools/testing/selftests/bpf/test_xsk.sh | 24 ++++++++++++++++++++++++ tools/testing/selftests/bpf/xdpxceiver.c | 22 +++++++++++++++++++--- tools/testing/selftests/bpf/xdpxceiver.h | 3 ++- 3 files changed, 45 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 0b7bafb65f43..aad8065637fd 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -173,6 +173,30 @@ retval=$? test_status $retval "${TEST_NAME}" statusList+=($retval) +### TEST 4 +TEST_NAME="DRV NOPOLL" + +vethXDPnative ${VETH0} ${VETH1} ${NS1} + +params=("-N") +execxdpxceiver params + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + +### TEST 5 +TEST_NAME="DRV POLL" + +vethXDPnative ${VETH0} ${VETH1} ${NS1} + +params=("-N" "-p") +execxdpxceiver params + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + ## END TESTS cleanup_exit ${VETH0} ${VETH1} ${NS1} diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 3f2a65b6a9f5..9fcd80a38b07 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -27,7 +27,16 @@ * a. nopoll - soft-irq processing * b. poll - using poll() syscall * - * Total tests: 2 + * 2. AF_XDP DRV/Native mode + * Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes + * packets before SKB allocation. Provides better performance than SKB. Driver + * hook available just after DMA of buffer descriptor. + * a. nopoll + * b. poll + * - Only copy mode is supported because veth does not currently support + * zero-copy mode + * + * Total tests: 4 * * Flow: * ----- @@ -88,7 +97,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) #define print_ksft_result(void)\ - (ksft_test_result_pass("PASS: %s %s\n", uut ? "" : "SKB", opt_poll ? "POLL" : "NOPOLL")) + (ksft_test_result_pass("PASS: %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" : "NOPOLL")) static void pthread_init_mutex(void) { @@ -311,6 +320,7 @@ static struct option long_options[] = { {"queue", optional_argument, 0, 'q'}, {"poll", no_argument, 0, 'p'}, {"xdp-skb", no_argument, 0, 'S'}, + {"xdp-native", no_argument, 0, 'N'}, {"copy", no_argument, 0, 'c'}, {"debug", optional_argument, 0, 'D'}, {"tx-pkt-count", optional_argument, 0, 'C'}, @@ -326,6 +336,7 @@ static void usage(const char *prog) " -q, --queue=n Use queue n (default 0)\n" " -p, --poll Use poll syscall\n" " -S, --xdp-skb=n Use XDP SKB mode\n" + " -N, --xdp-native=n Enforce XDP DRV (native) mode\n" " -c, --copy Force copy mode\n" " -D, --debug Debug mode - dump packets L2 - L5\n" " -C, --tx-pkt-count=n Number of packets to send\n"; @@ -417,7 +428,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:q:pScDC:", long_options, &option_index); + c = getopt_long(argc, argv, "i:q:pSNcDC:", long_options, &option_index); if (c == -1) break; @@ -448,6 +459,11 @@ static void parse_command_line(int argc, char **argv) opt_xdp_bind_flags |= XDP_COPY; uut = ORDER_CONTENT_VALIDATE_XDP_SKB; break; + case 'N': + opt_xdp_flags |= XDP_FLAGS_DRV_MODE; + opt_xdp_bind_flags |= XDP_COPY; + uut = ORDER_CONTENT_VALIDATE_XDP_DRV; + break; case 'c': opt_xdp_bind_flags |= XDP_COPY; break; diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 5929f2fc1224..12070d66344b 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -38,7 +38,7 @@ #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 #define POLL_TMOUT 1000 -#define NEED_WAKEUP 1 +#define NEED_WAKEUP true typedef __u32 u32; typedef __u16 u16; @@ -46,6 +46,7 @@ typedef __u8 u8; enum TESTS { ORDER_CONTENT_VALIDATE_XDP_SKB = 0, + ORDER_CONTENT_VALIDATE_XDP_DRV = 1, }; u8 uut; -- cgit v1.3.1 From 6674bf66560a6c55aada1e3cd4fca7a3ed204075 Mon Sep 17 00:00:00 2001 From: Weqaar Janjua Date: Mon, 7 Dec 2020 21:53:32 +0000 Subject: selftests/bpf: Xsk selftests - Socket Teardown - SKB, DRV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds following tests: 1. AF_XDP SKB mode c. Socket Teardown Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy both sockets, then repeat multiple times. Only nopoll mode is used 2. AF_XDP DRV/Native mode c. Socket Teardown * Only copy mode is supported because veth does not currently support zero-copy mode Signed-off-by: Weqaar Janjua Signed-off-by: Daniel Borkmann Tested-by: Yonghong Song Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201207215333.11586-5-weqaar.a.janjua@intel.com --- tools/testing/selftests/bpf/test_xsk.sh | 24 ++++++++++++++++++++++ tools/testing/selftests/bpf/xdpxceiver.c | 35 ++++++++++++++++++++++++++++---- tools/testing/selftests/bpf/xdpxceiver.h | 2 ++ 3 files changed, 57 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index aad8065637fd..9be9dff25560 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -197,6 +197,30 @@ retval=$? test_status $retval "${TEST_NAME}" statusList+=($retval) +### TEST 6 +TEST_NAME="SKB SOCKET TEARDOWN" + +vethXDPgeneric ${VETH0} ${VETH1} ${NS1} + +params=("-S" "-T") +execxdpxceiver params + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + +### TEST 7 +TEST_NAME="DRV SOCKET TEARDOWN" + +vethXDPnative ${VETH0} ${VETH1} ${NS1} + +params=("-N" "-T") +execxdpxceiver params + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + ## END TESTS cleanup_exit ${VETH0} ${VETH1} ${NS1} diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 9fcd80a38b07..e8907109782d 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -26,6 +26,9 @@ * generic XDP path. XDP hook from netif_receive_skb(). * a. nopoll - soft-irq processing * b. poll - using poll() syscall + * c. Socket Teardown + * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy + * both sockets, then repeat multiple times. Only nopoll mode is used * * 2. AF_XDP DRV/Native mode * Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes @@ -33,10 +36,11 @@ * hook available just after DMA of buffer descriptor. * a. nopoll * b. poll + * c. Socket Teardown * - Only copy mode is supported because veth does not currently support * zero-copy mode * - * Total tests: 4 + * Total tests: 6 * * Flow: * ----- @@ -97,7 +101,8 @@ static void __exit_with_error(int error, const char *file, const char *func, int #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) #define print_ksft_result(void)\ - (ksft_test_result_pass("PASS: %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" : "NOPOLL")) + (ksft_test_result_pass("PASS: %s %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\ + "NOPOLL", opt_teardown ? "Socket Teardown" : "")) static void pthread_init_mutex(void) { @@ -322,6 +327,7 @@ static struct option long_options[] = { {"xdp-skb", no_argument, 0, 'S'}, {"xdp-native", no_argument, 0, 'N'}, {"copy", no_argument, 0, 'c'}, + {"tear-down", no_argument, 0, 'T'}, {"debug", optional_argument, 0, 'D'}, {"tx-pkt-count", optional_argument, 0, 'C'}, {0, 0, 0, 0} @@ -338,6 +344,7 @@ static void usage(const char *prog) " -S, --xdp-skb=n Use XDP SKB mode\n" " -N, --xdp-native=n Enforce XDP DRV (native) mode\n" " -c, --copy Force copy mode\n" + " -T, --tear-down Tear down sockets by repeatedly recreating them\n" " -D, --debug Debug mode - dump packets L2 - L5\n" " -C, --tx-pkt-count=n Number of packets to send\n"; ksft_print_msg(str, prog); @@ -428,7 +435,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:q:pSNcDC:", long_options, &option_index); + c = getopt_long(argc, argv, "i:q:pSNcTDC:", long_options, &option_index); if (c == -1) break; @@ -467,6 +474,9 @@ static void parse_command_line(int argc, char **argv) case 'c': opt_xdp_bind_flags |= XDP_COPY; break; + case 'T': + opt_teardown = 1; + break; case 'D': debug_pkt_dump = 1; break; @@ -871,6 +881,9 @@ static void *worker_testapp_validate(void *arg) ksft_print_msg("Received %d packets on interface %s\n", pkt_counter, ((struct ifobject *)arg)->ifname); + + if (opt_teardown) + ksft_print_msg("Destroying socket\n"); } xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk); @@ -916,6 +929,20 @@ static void testapp_validate(void) free(pkt_buf); } + if (!opt_teardown) + print_ksft_result(); +} + +static void testapp_sockets(void) +{ + for (int i = 0; i < MAX_TEARDOWN_ITER; i++) { + pkt_counter = 0; + prev_pkt = -1; + sigvar = 0; + ksft_print_msg("Creating socket\n"); + testapp_validate(); + } + print_ksft_result(); } @@ -982,7 +1009,7 @@ int main(int argc, char **argv) ksft_set_plan(1); - testapp_validate(); + opt_teardown ? testapp_sockets() : testapp_validate(); for (int i = 0; i < MAX_INTERFACES; i++) free(ifdict[i]); diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 12070d66344b..58185b914f99 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -21,6 +21,7 @@ #define MAX_INTERFACE_NAME_CHARS 7 #define MAX_INTERFACES_NAMESPACE_CHARS 10 #define MAX_SOCKS 1 +#define MAX_TEARDOWN_ITER 10 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ sizeof(struct udphdr)) #define MIN_PKT_SIZE 64 @@ -57,6 +58,7 @@ static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int opt_queue; static int opt_pkt_count; static int opt_poll; +static int opt_teardown; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; static u32 pkt_counter; -- cgit v1.3.1 From 7d20441eb05ec6d8dc7b16381c53b3c0b3ad6e8a Mon Sep 17 00:00:00 2001 From: Weqaar Janjua Date: Mon, 7 Dec 2020 21:53:33 +0000 Subject: selftests/bpf: Xsk selftests - Bi-directional Sockets - SKB, DRV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds following tests: 1. AF_XDP SKB mode d. Bi-directional Sockets Configure sockets as bi-directional tx/rx sockets, sets up fill and completion rings on each socket, tx/rx in both directions. Only nopoll mode is used 2. AF_XDP DRV/Native mode d. Bi-directional Sockets * Only copy mode is supported because veth does not currently support zero-copy mode Signed-off-by: Weqaar Janjua Signed-off-by: Daniel Borkmann Tested-by: Yonghong Song Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201207215333.11586-6-weqaar.a.janjua@intel.com --- tools/testing/selftests/bpf/test_xsk.sh | 24 ++++++++ tools/testing/selftests/bpf/xdpxceiver.c | 100 +++++++++++++++++++++++-------- tools/testing/selftests/bpf/xdpxceiver.h | 4 ++ 3 files changed, 104 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 9be9dff25560..88a7483eaae4 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -221,6 +221,30 @@ retval=$? test_status $retval "${TEST_NAME}" statusList+=($retval) +### TEST 8 +TEST_NAME="SKB BIDIRECTIONAL SOCKETS" + +vethXDPgeneric ${VETH0} ${VETH1} ${NS1} + +params=("-S" "-B") +execxdpxceiver params + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + +### TEST 9 +TEST_NAME="DRV BIDIRECTIONAL SOCKETS" + +vethXDPnative ${VETH0} ${VETH1} ${NS1} + +params=("-N" "-B") +execxdpxceiver params + +retval=$? +test_status $retval "${TEST_NAME}" +statusList+=($retval) + ## END TESTS cleanup_exit ${VETH0} ${VETH1} ${NS1} diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index e8907109782d..014dedaa4dd2 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -29,6 +29,10 @@ * c. Socket Teardown * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy * both sockets, then repeat multiple times. Only nopoll mode is used + * d. Bi-directional sockets + * Configure sockets as bi-directional tx/rx sockets, sets up fill and + * completion rings on each socket, tx/rx in both directions. Only nopoll + * mode is used * * 2. AF_XDP DRV/Native mode * Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes @@ -37,10 +41,11 @@ * a. nopoll * b. poll * c. Socket Teardown + * d. Bi-directional sockets * - Only copy mode is supported because veth does not currently support * zero-copy mode * - * Total tests: 6 + * Total tests: 8 * * Flow: * ----- @@ -101,8 +106,9 @@ static void __exit_with_error(int error, const char *file, const char *func, int #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) #define print_ksft_result(void)\ - (ksft_test_result_pass("PASS: %s %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\ - "NOPOLL", opt_teardown ? "Socket Teardown" : "")) + (ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\ + "NOPOLL", opt_teardown ? "Socket Teardown" : "",\ + opt_bidi ? "Bi-directional Sockets" : "")) static void pthread_init_mutex(void) { @@ -308,8 +314,13 @@ static int xsk_configure_socket(struct ifobject *ifobject) cfg.xdp_flags = opt_xdp_flags; cfg.bind_flags = opt_xdp_bind_flags; - rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL; - txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL; + if (!opt_bidi) { + rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL; + txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL; + } else { + rxr = &ifobject->xsk->rx; + txr = &ifobject->xsk->tx; + } ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname, opt_queue, ifobject->umem->umem, rxr, txr, &cfg); @@ -328,6 +339,7 @@ static struct option long_options[] = { {"xdp-native", no_argument, 0, 'N'}, {"copy", no_argument, 0, 'c'}, {"tear-down", no_argument, 0, 'T'}, + {"bidi", optional_argument, 0, 'B'}, {"debug", optional_argument, 0, 'D'}, {"tx-pkt-count", optional_argument, 0, 'C'}, {0, 0, 0, 0} @@ -345,6 +357,7 @@ static void usage(const char *prog) " -N, --xdp-native=n Enforce XDP DRV (native) mode\n" " -c, --copy Force copy mode\n" " -T, --tear-down Tear down sockets by repeatedly recreating them\n" + " -B, --bidi Bi-directional sockets test\n" " -D, --debug Debug mode - dump packets L2 - L5\n" " -C, --tx-pkt-count=n Number of packets to send\n"; ksft_print_msg(str, prog); @@ -435,7 +448,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:q:pSNcTDC:", long_options, &option_index); + c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index); if (c == -1) break; @@ -477,6 +490,9 @@ static void parse_command_line(int argc, char **argv) case 'T': opt_teardown = 1; break; + case 'B': + opt_bidi = 1; + break; case 'D': debug_pkt_dump = 1; break; @@ -802,22 +818,25 @@ static void *worker_testapp_validate(void *arg) struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data)); struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr)); struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; - void *bufs; + void *bufs = NULL; pthread_attr_setstacksize(&attr, THREAD_STACK); - bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (bufs == MAP_FAILED) - exit_with_error(errno); + if (!bidi_pass) { + bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (bufs == MAP_FAILED) + exit_with_error(errno); - if (strcmp(((struct ifobject *)arg)->nsname, "")) - switch_namespace(((struct ifobject *)arg)->ifdict_index); + if (strcmp(((struct ifobject *)arg)->nsname, "")) + switch_namespace(((struct ifobject *)arg)->ifdict_index); + } if (((struct ifobject *)arg)->fv.vector == tx) { int spinningrxctr = 0; - thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx); + if (!bidi_pass) + thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx); while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) { spinningrxctr++; @@ -847,7 +866,8 @@ static void *worker_testapp_validate(void *arg) struct pollfd fds[MAX_SOCKS] = { }; int ret; - thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx); + if (!bidi_pass) + thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx); ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname); xsk_populate_fill_ring(((struct ifobject *)arg)->umem); @@ -886,8 +906,10 @@ static void *worker_testapp_validate(void *arg) ksft_print_msg("Destroying socket\n"); } - xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk); - (void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem); + if (!opt_bidi || (opt_bidi && bidi_pass)) { + xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk); + (void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem); + } pthread_exit(NULL); } @@ -896,11 +918,26 @@ static void testapp_validate(void) pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, THREAD_STACK); + if (opt_bidi && bidi_pass) { + pthread_init_mutex(); + if (!switching_notify) { + ksft_print_msg("Switching Tx/Rx vectors\n"); + switching_notify++; + } + } + pthread_mutex_lock(&sync_mutex); /*Spawn RX thread */ - if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1])) - exit_with_error(errno); + if (!opt_bidi || (opt_bidi && !bidi_pass)) { + if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1])) + exit_with_error(errno); + } else if (opt_bidi && bidi_pass) { + /*switch Tx/Rx vectors */ + ifdict[0]->fv.vector = rx; + if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[0])) + exit_with_error(errno); + } struct timespec max_wait = { 0, 0 }; @@ -914,8 +951,15 @@ static void testapp_validate(void) pthread_mutex_unlock(&sync_mutex); /*Spawn TX thread */ - if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0])) - exit_with_error(errno); + if (!opt_bidi || (opt_bidi && !bidi_pass)) { + if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0])) + exit_with_error(errno); + } else if (opt_bidi && bidi_pass) { + /*switch Tx/Rx vectors */ + ifdict[1]->fv.vector = tx; + if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[1])) + exit_with_error(errno); + } pthread_join(t1, NULL); pthread_join(t0, NULL); @@ -929,18 +973,19 @@ static void testapp_validate(void) free(pkt_buf); } - if (!opt_teardown) + if (!opt_teardown && !opt_bidi) print_ksft_result(); } static void testapp_sockets(void) { - for (int i = 0; i < MAX_TEARDOWN_ITER; i++) { + for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) { pkt_counter = 0; prev_pkt = -1; sigvar = 0; ksft_print_msg("Creating socket\n"); testapp_validate(); + opt_bidi ? bidi_pass++ : bidi_pass; } print_ksft_result(); @@ -1009,7 +1054,14 @@ int main(int argc, char **argv) ksft_set_plan(1); - opt_teardown ? testapp_sockets() : testapp_validate(); + if (!opt_teardown && !opt_bidi) { + testapp_validate(); + } else if (opt_teardown && opt_bidi) { + ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n"); + ksft_exit_xfail(); + } else { + testapp_sockets(); + } for (int i = 0; i < MAX_INTERFACES; i++) free(ifdict[i]); diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 58185b914f99..61f595b6f200 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -22,6 +22,7 @@ #define MAX_INTERFACES_NAMESPACE_CHARS 10 #define MAX_SOCKS 1 #define MAX_TEARDOWN_ITER 10 +#define MAX_BIDI_ITER 2 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ sizeof(struct udphdr)) #define MIN_PKT_SIZE 64 @@ -53,12 +54,15 @@ enum TESTS { u8 uut; u8 debug_pkt_dump; u32 num_frames; +u8 switching_notify; +u8 bidi_pass; static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int opt_queue; static int opt_pkt_count; static int opt_poll; static int opt_teardown; +static int opt_bidi; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; static u32 pkt_counter; -- cgit v1.3.1 From efaa83a3736d392c61499ee3aad8690a142675cd Mon Sep 17 00:00:00 2001 From: Collin Walling Date: Mon, 7 Dec 2020 10:41:25 -0500 Subject: KVM: selftests: sync_regs test for diag318 The DIAGNOSE 0x0318 instruction, unique to s390x, is a privileged call that must be intercepted via SIE, handled in userspace, and the information set by the instruction is communicated back to KVM. To test the instruction interception, an ad-hoc handler is defined which simply has a VM execute the instruction and then userspace will extract the necessary info. The handler is defined such that the instruction invocation occurs only once. It is up to the caller to determine how the info returned by this handler should be used. The diag318 info is communicated from userspace to KVM via a sync_regs call. This is tested during a sync_regs test, where the diag318 info is requested via the handler, then the info is stored in the appropriate register in KVM via a sync registers call. If KVM does not support diag318, then the tests will print a message stating that diag318 was skipped, and the asserts will simply test against a value of 0. Signed-off-by: Collin Walling Link: https://lore.kernel.org/r/20201207154125.10322-1-walling@linux.ibm.com Acked-by: Janosch Frank Acked-by: Cornelia Huck Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- tools/testing/selftests/kvm/Makefile | 2 +- .../kvm/include/s390x/diag318_test_handler.h | 13 ++++ .../selftests/kvm/lib/s390x/diag318_test_handler.c | 82 ++++++++++++++++++++++ tools/testing/selftests/kvm/s390x/sync_regs_test.c | 16 ++++- 4 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h create mode 100644 tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 30afbad36cd5..1b9c257fca5e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -36,7 +36,7 @@ endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c -LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c +LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h new file mode 100644 index 000000000000..b0ed71302722 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * Test handler for the s390x DIAGNOSE 0x0318 instruction. + * + * Copyright (C) 2020, IBM + */ + +#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER +#define SELFTEST_KVM_DIAG318_TEST_HANDLER + +uint64_t get_diag318_info(void); + +#endif diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c new file mode 100644 index 000000000000..86b9e611ad87 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test handler for the s390x DIAGNOSE 0x0318 instruction. + * + * Copyright (C) 2020, IBM + */ + +#include "test_util.h" +#include "kvm_util.h" + +#define VCPU_ID 6 + +#define ICPT_INSTRUCTION 0x04 +#define IPA0_DIAG 0x8300 + +static void guest_code(void) +{ + uint64_t diag318_info = 0x12345678; + + asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info)); +} + +/* + * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such, + * we create an ad-hoc VM here to handle the instruction then extract the + * necessary data. It is up to the caller to decide what to do with that data. + */ +static uint64_t diag318_handler(void) +{ + struct kvm_vm *vm; + struct kvm_run *run; + uint64_t reg; + uint64_t diag318_info; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_run(vm, VCPU_ID); + run = vcpu_state(vm, VCPU_ID); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "DIAGNOSE 0x0318 instruction was not intercepted"); + TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION, + "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode); + TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG, + "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00)); + + reg = (run->s390_sieic.ipa & 0x00f0) >> 4; + diag318_info = run->s.regs.gprs[reg]; + + TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set"); + + kvm_vm_free(vm); + + return diag318_info; +} + +uint64_t get_diag318_info(void) +{ + static uint64_t diag318_info; + static bool printed_skip; + + /* + * If KVM does not support diag318, then return 0 to + * ensure tests do not break. + */ + if (!kvm_check_cap(KVM_CAP_S390_DIAG318)) { + if (!printed_skip) { + fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. " + "Skipping diag318 test.\n"); + printed_skip = true; + } + return 0; + } + + /* + * If a test has previously requested the diag318 info, + * then don't bother spinning up a temporary VM again. + */ + if (!diag318_info) + diag318_info = diag318_handler(); + + return diag318_info; +} diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 5731ccf34917..caf7b8859a94 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -20,6 +20,7 @@ #include "test_util.h" #include "kvm_util.h" +#include "diag318_test_handler.h" #define VCPU_ID 5 @@ -70,7 +71,7 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) #undef REG_COMPARE -#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS) +#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318) #define INVALID_SYNC_FIELD 0x80000000 int main(int argc, char *argv[]) @@ -152,6 +153,12 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS; + + if (get_diag318_info() > 0) { + run->s.regs.diag318 = get_diag318_info(); + run->kvm_dirty_regs |= KVM_SYNC_DIAG318; + } + rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, @@ -164,6 +171,9 @@ int main(int argc, char *argv[]) TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11, "acr0 sync regs value incorrect 0x%x.", run->s.regs.acrs[0]); + TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(), + "diag318 sync regs value incorrect 0x%llx.", + run->s.regs.diag318); vcpu_regs_get(vm, VCPU_ID, ®s); compare_regs(®s, &run->s.regs); @@ -177,6 +187,7 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; run->s.regs.gprs[11] = 0xDEADBEEF; + run->s.regs.diag318 = 0x4B1D; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, @@ -186,6 +197,9 @@ int main(int argc, char *argv[]) TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, "r11 sync regs value incorrect 0x%llx.", run->s.regs.gprs[11]); + TEST_ASSERT(run->s.regs.diag318 != 0x4B1D, + "diag318 sync regs value incorrect 0x%llx.", + run->s.regs.diag318); kvm_vm_free(vm); -- cgit v1.3.1 From a5b7b1194a57bc59f289f3e4433a1be81cc3e19d Mon Sep 17 00:00:00 2001 From: Veronika Kabatova Date: Thu, 10 Dec 2020 13:01:34 +0100 Subject: selftests/bpf: Drop tcp-{client,server}.py from Makefile The files don't exist anymore so this breaks generic kselftest builds when using "make install" or "make gen_tar". Fixes: 247f0ec361b7 ("selftests/bpf: Drop python client/server in favor of threads") Signed-off-by: Veronika Kabatova Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201210120134.2148482-1-vkabatov@redhat.com --- tools/testing/selftests/bpf/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 944ae17a39ed..50b3495d7ddf 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -75,8 +75,6 @@ TEST_PROGS := test_kmod.sh \ TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ - tcp_client.py \ - tcp_server.py \ test_xdp_vlan.sh # Compile but not part of 'make run_tests' -- cgit v1.3.1 From 7535a3526dfe78db02a08ca2fa6bf69f393105dd Mon Sep 17 00:00:00 2001 From: Weqaar Janjua Date: Thu, 10 Dec 2020 11:54:35 +0000 Subject: selftests/bpf: Xsk selftests - adding xdpxceiver to .gitignore This patch adds *xdpxceiver* to selftests/bpf/.gitignore Reported-by: Yonghong Song Suggested-by: Yonghong Song Signed-off-by: Weqaar Janjua Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201210115435.3995-1-weqaar.a.janjua@intel.com --- tools/testing/selftests/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 752d8edddc66..f5b7ef93618c 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -36,3 +36,4 @@ test_cpp /runqslower /bench *.ko +xdpxceiver -- cgit v1.3.1 From 41003dd0241c2ceb2461a88a18ff461795f2af57 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 9 Dec 2020 15:29:12 +0100 Subject: selftests/bpf: Make selftest compilation work on clang 11 We can't compile test_core_reloc_module.c selftest with clang 11, compile fails with: CLNG-LLC [test_maps] test_core_reloc_module.o progs/test_core_reloc_module.c:57:21: error: use of unknown builtin \ '__builtin_preserve_type_info' [-Wimplicit-function-declaration] out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx); Skipping these tests if __builtin_preserve_type_info() is not supported by compiler. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201209142912.99145-1-jolsa@kernel.org --- tools/testing/selftests/bpf/progs/test_core_reloc_module.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c index 56363959f7b0..f59f175c7baf 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c @@ -40,6 +40,7 @@ int BPF_PROG(test_core_module_probed, struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx) { +#if __has_builtin(__builtin_preserve_enum_value) struct core_reloc_module_output *out = (void *)&data.out; __u64 pid_tgid = bpf_get_current_pid_tgid(); __u32 real_tgid = (__u32)(pid_tgid >> 32); @@ -61,6 +62,9 @@ int BPF_PROG(test_core_module_probed, out->len_exists = bpf_core_field_exists(read_ctx->len); out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm); +#else + data.skip = true; +#endif return 0; } @@ -70,6 +74,7 @@ int BPF_PROG(test_core_module_direct, struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx) { +#if __has_builtin(__builtin_preserve_enum_value) struct core_reloc_module_output *out = (void *)&data.out; __u64 pid_tgid = bpf_get_current_pid_tgid(); __u32 real_tgid = (__u32)(pid_tgid >> 32); @@ -91,6 +96,9 @@ int BPF_PROG(test_core_module_direct, out->len_exists = bpf_core_field_exists(read_ctx->len); out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm); +#else + data.skip = true; +#endif return 0; } -- cgit v1.3.1 From 511a76bcb0ce242a19153658b25437906cc6070e Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 8 Dec 2020 19:01:52 +0100 Subject: selftests/bpf: Add test for signed 32-bit bound check bug After a 32-bit load followed by a branch, the verifier would reduce the maximum bound of the register to 0x7fffffff, allowing a user to bypass bound checks. Ensure such a program is rejected. In the second test, the 64-bit compare should not sufficient to determine whether the signed 32-bit lower bound is 0, so the verifier should reject the second branch. Signed-off-by: Jean-Philippe Brucker Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/bounds.c | 41 +++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index dac40de3f868..57ed67b86074 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -703,3 +703,44 @@ .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, +{ + "bounds checks after 32-bit truncation. test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + /* This used to reduce the max bound to 0x7fffffff */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0x7fffffff, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "bounds checks after 32-bit truncation. test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_1, 1, 1), + BPF_JMP32_IMM(BPF_JSLT, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, -- cgit v1.3.1 From 77ce220c0549dcc3db8226c61c60e83fc59dfafc Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 8 Dec 2020 19:01:53 +0100 Subject: selftests/bpf: Fix array access with signed variable test The test fails because of a recent fix to the verifier, even though this program is valid. In details what happens is: 7: (61) r1 = *(u32 *)(r0 +0) Load a 32-bit value, with signed bounds [S32_MIN, S32_MAX]. The bounds of the 64-bit value are [0, U32_MAX]... 8: (65) if r1 s> 0xffffffff goto pc+1 ... therefore this is always true (the operand is sign-extended). 10: (b4) w2 = 11 11: (6d) if r2 s> r1 goto pc+1 When true, the 64-bit bounds become [0, 10]. The 32-bit bounds are still [S32_MIN, 10]. 13: (64) w1 <<= 2 Because this is a 32-bit operation, the verifier propagates the new 32-bit bounds to the 64-bit ones, and the knowledge gained from insn 11 is lost. 14: (0f) r0 += r1 15: (7a) *(u64 *)(r0 +0) = 4 Then the verifier considers r0 unbounded here, rejecting the test. To make the test work, change insn 8 to check the sign of the 32-bit value. Signed-off-by: Jean-Philippe Brucker Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/array_access.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 1c4b1939f5a8..bed53b561e04 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -68,7 +68,7 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), BPF_MOV32_IMM(BPF_REG_1, 0), BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), -- cgit v1.3.1 From 3615bdf6d9b19db12b1589861609b4f1c6a8d303 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 8 Dec 2020 19:01:54 +0100 Subject: selftests/bpf: Fix "dubious pointer arithmetic" test The verifier trace changed following a bugfix. After checking the 64-bit sign, only the upper bit mask is known, not bit 31. Update the test accordingly. Signed-off-by: Jean-Philippe Brucker Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/align.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 52414058a627..5861446d0777 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -456,10 +456,10 @@ static struct bpf_align_test tests[] = { */ {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ - {9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, - {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -467,7 +467,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { -- cgit v1.3.1 From 38bf8cd821be292e7d8e6f6283d67c5d9708f887 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Dec 2020 12:21:13 +0100 Subject: selftests: fix poll error in udpgro.sh The test program udpgso_bench_rx always invokes the poll() syscall with a timeout of 10ms. If a larger timeout is specified via the command line, udpgso_bench_rx is supposed to do multiple poll() calls till the timeout is expired or an event is received. Currently the poll() loop errors out after the first invocation with no events, and may causes self-tests failure alike: failed GRO with custom segment size ./udpgso_bench_rx: poll: 0x0 expected 0x1 This change addresses the issue allowing the poll() loop to consume all the configured timeout. Fixes: ada641ff6ed3 ("selftests: fixes for UDP GRO") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgso_bench_rx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index db3d4a8b5a4c..76a24052f4b4 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms) interrupted = true; break; } + + /* no events and more time to wait, do poll again */ + continue; } if (pfd.revents != POLLIN) error(1, errno, "poll: 0x%x expected 0x%x\n", -- cgit v1.3.1 From a67079b03165a17f9aceab3dd26b1638af68e0fc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 10 Dec 2020 17:59:46 -0800 Subject: selftests/bpf: fix bpf_testmod.ko recompilation logic bpf_testmod.ko build rule declared dependency on VMLINUX_BTF, but the variable itself was initialized after the rule was declared, which often caused bpf_testmod.ko to not be re-compiled. Fix by moving VMLINUX_BTF determination sooner. Also enforce bpf_testmod.ko recompilation when we detect that vmlinux image changed by removing bpf_testmod/bpf_testmod.ko. This is necessary to generate correct module's split BTF. Without it, Kbuild's module build logic might determine that nothing changed on the kernel side and thus bpf_testmod.ko shouldn't be rebuilt, so won't re-generate module BTF, which often leads to module's BTF with wrong string offsets against vmlinux BTF. Removing .ko file forces Kbuild to re-build the module. Reported-by: Alexei Starovoitov Fixes: 9f7fa225894c ("selftests/bpf: Add bpf_testmod kernel module for testing") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20201211015946.4062098-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 50b3495d7ddf..8b515a17f44b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -116,6 +116,13 @@ INCLUDE_DIR := $(SCRATCH_DIR)/include BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) + # Define simple and short `make test_progs`, `make test_sysctl`, etc targets # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static @@ -140,6 +147,7 @@ $(OUTPUT)/urandom_read: urandom_read.c $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) + $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation $(Q)$(MAKE) $(submake_extras) -C bpf_testmod $(Q)cp bpf_testmod/bpf_testmod.ko $@ @@ -147,13 +155,6 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(Q)$(CC) -c $(CFLAGS) -o $@ $< -VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ - $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ - ../../../../vmlinux \ - /sys/kernel/btf/vmlinux \ - /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) - DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) -- cgit v1.3.1 From 89ad7420b25c2b40a4d916f4fd43b9ccacd50500 Mon Sep 17 00:00:00 2001 From: Andrew Delgadillo Date: Fri, 11 Dec 2020 00:43:44 +0000 Subject: selftests/bpf: Drop the need for LLVM's llc LLC is meant for compiler development and debugging. Consequently, it exposes many low level options about its backend. To avoid future bugs introduced by using the raw LLC tool, use clang directly so that all appropriate options are passed to the back end. Additionally, simplify the Makefile by removing the CLANG_NATIVE_BPF_BUILD_RULE as it is not being use, stop passing dwarfris attr since elfutils/libdw now supports the bpf backend (which should work with any recent pahole), and stop passing alu32 since -mcpu=v3 implies alu32. Signed-off-by: Andrew Delgadillo Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201211004344.3355074-1-adelg@google.com --- tools/testing/selftests/bpf/Makefile | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8b515a17f44b..8c33e999319a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -19,7 +19,6 @@ ifneq ($(wildcard $(GENHDR)),) endif CLANG ?= clang -LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= @@ -253,31 +252,19 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $1 - input .c file # $2 - output .o file # $3 - CFLAGS -# $4 - LDFLAGS define CLANG_BPF_BUILD_RULE - $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ - -c $1 -o - || echo "BPF obj compilation failed") | \ - $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) + $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE - $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ - -c $1 -o - || echo "BPF obj compilation failed") | \ - $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 -endef -# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC -define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - $(Q)($(CLANG) $3 -O2 -emit-llvm \ - -c $1 -o - || echo "BPF obj compilation failed") | \ - $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 + $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -332,8 +319,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $$(INCLUDE_DIR)/vmlinux.h \ $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ - $(TRUNNER_BPF_CFLAGS), \ - $(TRUNNER_BPF_LDFLAGS)) + $(TRUNNER_BPF_CFLAGS)) $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ @@ -401,19 +387,16 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -TRUNNER_BPF_LDFLAGS := -mattr=+alu32 $(eval $(call DEFINE_TEST_RUNNER,test_progs)) # Define test_progs-no_alu32 test runner. TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE -TRUNNER_BPF_LDFLAGS := $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) # Define test_progs BPF-GCC-flavored test runner. ifneq ($(BPF_GCC),) TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc) -TRUNNER_BPF_LDFLAGS := $(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc)) endif @@ -424,7 +407,6 @@ TRUNNER_EXTRA_SOURCES := test_maps.c TRUNNER_EXTRA_FILES := TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built) TRUNNER_BPF_CFLAGS := -TRUNNER_BPF_LDFLAGS := $(eval $(call DEFINE_TEST_RUNNER,test_maps)) # Define test_verifier test runner. -- cgit v1.3.1 From b4fe9fec51ef48011f11c2da4099f0b530449c92 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 11 Dec 2020 01:07:11 +0000 Subject: selftests/bpf: Silence ima_setup.sh when not running in verbose mode. Currently, ima_setup.sh spews outputs from commands like mkfs and dd on the terminal without taking into account the verbosity level of the test framework. Update test_progs to set the environment variable SELFTESTS_VERBOSE=1 when a verbose output is requested. This environment variable is then used by ima_setup.sh (and can be used by other similar scripts) to obey the verbosity level of the test harness without needing to re-implement command line options for verbosity. In "silent" mode, the script saves the output to a temporary file, the contents of which are echoed back to stderr when the script encounters an error. Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash") Reported-by: Andrii Nakryiko Suggested-by: Andrii Nakryiko Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201211010711.3716917-1-kpsingh@kernel.org --- tools/testing/selftests/bpf/ima_setup.sh | 24 ++++++++++++++++++++++++ tools/testing/selftests/bpf/test_progs.c | 10 ++++++++++ 2 files changed, 34 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh index 2bfc646bc230..8e62581113a3 100755 --- a/tools/testing/selftests/bpf/ima_setup.sh +++ b/tools/testing/selftests/bpf/ima_setup.sh @@ -7,6 +7,8 @@ set -o pipefail IMA_POLICY_FILE="/sys/kernel/security/ima/policy" TEST_BINARY="/bin/true" +VERBOSE="${SELFTESTS_VERBOSE:=0}" +LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)" usage() { @@ -75,6 +77,19 @@ run() exec "${copied_bin_path}" } +catch() +{ + local exit_code="$1" + local log_file="$2" + + if [[ "${exit_code}" -ne 0 ]]; then + cat "${log_file}" >&3 + fi + + rm -f "${log_file}" + exit ${exit_code} +} + main() { [[ $# -ne 2 ]] && usage @@ -96,4 +111,13 @@ main() fi } +trap 'catch "$?" "${LOG_FILE}"' EXIT + +if [[ "${VERBOSE}" -eq 0 ]]; then + # Save the stderr to 3 so that we can output back to + # it incase of an error. + exec 3>&2 1>"${LOG_FILE}" 2>&1 +fi + main "$@" +rm -f "${LOG_FILE}" diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 5ef081bdae4e..7d077d48cadd 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -587,6 +587,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return -EINVAL; } } + + if (env->verbosity > VERBOSE_NONE) { + if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) { + fprintf(stderr, + "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)", + errno); + return -1; + } + } + break; case ARG_GET_TEST_CNT: env->get_test_cnt = true; -- cgit v1.3.1 From b7906b70a2337e445b8dca3ce7ba8976b6ebd07d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Dec 2020 22:36:25 +0100 Subject: bpf: Fix enum names for bpf_this_cpu_ptr() and bpf_per_cpu_ptr() helpers Remove bpf_ prefix, which causes these helpers to be reported in verifier dump as bpf_bpf_this_cpu_ptr() and bpf_bpf_per_cpu_ptr(), respectively. Lets fix it as long as it is still possible before UAPI freezes on these helpers. Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Linus Torvalds --- include/uapi/linux/bpf.h | 4 ++-- kernel/bpf/helpers.c | 4 ++-- kernel/trace/bpf_trace.c | 4 ++-- tools/include/uapi/linux/bpf.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e6ceac3f7d62..556216dc9703 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3897,8 +3897,8 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ - FN(bpf_per_cpu_ptr), \ - FN(bpf_this_cpu_ptr), \ + FN(per_cpu_ptr), \ + FN(this_cpu_ptr), \ FN(redirect_peer), \ /* */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 25520f5eeaf6..deda1185237b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -717,9 +717,9 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_snprintf_btf_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; - case BPF_FUNC_bpf_per_cpu_ptr: + case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; - case BPF_FUNC_bpf_this_cpu_ptr: + case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; default: break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 048c655315f1..a125ea5e04cd 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1337,9 +1337,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; - case BPF_FUNC_bpf_per_cpu_ptr: + case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; - case BPF_FUNC_bpf_this_cpu_ptr: + case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; default: return NULL; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e6ceac3f7d62..556216dc9703 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3897,8 +3897,8 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ - FN(bpf_per_cpu_ptr), \ - FN(bpf_this_cpu_ptr), \ + FN(per_cpu_ptr), \ + FN(this_cpu_ptr), \ FN(redirect_peer), \ /* */ -- cgit v1.3.1 From 3cea1891748e0ed8e79fa5d9afe40750319751d1 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:16 -0700 Subject: selftests: kvm: Test MSR exiting to userspace Add a selftest to test that when the ioctl KVM_X86_SET_MSR_FILTER is called with an MSR list, those MSRs exit to userspace. This test uses 3 MSRs to test this: 1. MSR_IA32_XSS, an MSR the kernel knows about. 2. MSR_IA32_FLUSH_CMD, an MSR the kernel does not know about. 3. MSR_NON_EXISTENT, an MSR invented in this test for the purposes of passing a fake MSR from the guest to userspace. KVM just acts as a pass through. Userspace is also able to inject a #GP. This is demonstrated when MSR_IA32_XSS and MSR_IA32_FLUSH_CMD are misused in the test. When this happens a #GP is initiated in userspace to be thrown in the guest which is handled gracefully by the exception handling framework introduced earlier in this series. Tests for the generic instruction emulator were also added. For this to work the module parameter kvm.force_emulation_prefix=1 has to be enabled. If it isn't enabled the tests will be skipped. A test was also added to ensure the MSR permission bitmap is being set correctly by executing reads and writes of MSR_FS_BASE and MSR_GS_BASE in the guest while alternating which MSR userspace should intercept. If the permission bitmap is being set correctly only one of the MSRs should be coming through at a time, and the guest should be able to read and write the other one directly. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-5-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 3 +- tools/testing/selftests/kvm/lib/kvm_util.c | 2 + .../selftests/kvm/x86_64/userspace_msr_exit_test.c | 560 +++++++++++++++++++++ 4 files changed, 565 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 5468db7dd674..5008bf90772b 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -18,6 +18,7 @@ /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/user_msr_test +/x86_64/userspace_msr_exit_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4febf4d5ead9..d54870db21c4 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -50,6 +50,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test +TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test @@ -58,7 +60,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b2c426adb87f..88ef7067f1e6 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1806,6 +1806,8 @@ static struct exit_reason { {KVM_EXIT_OSI, "OSI"}, {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, + {KVM_EXIT_X86_RDMSR, "RDMSR"}, + {KVM_EXIT_X86_WRMSR, "WRMSR"}, #ifdef KVM_EXIT_MEMORY_NOT_PRESENT {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, #endif diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c new file mode 100644 index 000000000000..e8b6918cdea0 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -0,0 +1,560 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for exiting into userspace on registered MSRs + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +/* Forced emulation prefix, used to invoke the emulator unconditionally. */ +#define KVM_FEP "ud2; .byte 'k', 'v', 'm';" +#define KVM_FEP_LENGTH 5 +static int fep_available = 1; + +#define VCPU_ID 1 +#define MSR_NON_EXISTENT 0x474f4f00 + +u64 deny_bits = 0; +struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test an MSR the kernel knows about. */ + .base = MSR_IA32_XSS, + .bitmap = (uint8_t*)&deny_bits, + }, { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test an MSR the kernel doesn't know about. */ + .base = MSR_IA32_FLUSH_CMD, + .bitmap = (uint8_t*)&deny_bits, + }, { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test a fabricated MSR that no one knows about. */ + .base = MSR_NON_EXISTENT, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +struct kvm_msr_filter filter_fs = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + .base = MSR_FS_BASE, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +struct kvm_msr_filter filter_gs = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + .base = MSR_GS_BASE, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +uint64_t msr_non_existent_data; +int guest_exception_count; + +/* + * Note: Force test_rdmsr() to not be inlined to prevent the labels, + * rdmsr_start and rdmsr_end, from being defined multiple times. + */ +static noinline uint64_t test_rdmsr(uint32_t msr) +{ + uint32_t a, d; + + guest_exception_count = 0; + + __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" : + "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +/* + * Note: Force test_wrmsr() to not be inlined to prevent the labels, + * wrmsr_start and wrmsr_end, from being defined multiple times. + */ +static noinline void test_wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + guest_exception_count = 0; + + __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" :: + "a"(a), "d"(d), "c"(msr) : "memory"); +} + +extern char rdmsr_start, rdmsr_end; +extern char wrmsr_start, wrmsr_end; + +/* + * Note: Force test_em_rdmsr() to not be inlined to prevent the labels, + * rdmsr_start and rdmsr_end, from being defined multiple times. + */ +static noinline uint64_t test_em_rdmsr(uint32_t msr) +{ + uint32_t a, d; + + guest_exception_count = 0; + + __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" : + "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +/* + * Note: Force test_em_wrmsr() to not be inlined to prevent the labels, + * wrmsr_start and wrmsr_end, from being defined multiple times. + */ +static noinline void test_em_wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + guest_exception_count = 0; + + __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" :: + "a"(a), "d"(d), "c"(msr) : "memory"); +} + +extern char em_rdmsr_start, em_rdmsr_end; +extern char em_wrmsr_start, em_wrmsr_end; + +static void guest_code(void) +{ + uint64_t data; + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS. + * + * A GP is thrown if anything other than 0 is written to + * MSR_IA32_XSS. + */ + data = test_rdmsr(MSR_IA32_XSS); + GUEST_ASSERT(data == 0); + GUEST_ASSERT(guest_exception_count == 0); + + test_wrmsr(MSR_IA32_XSS, 0); + GUEST_ASSERT(guest_exception_count == 0); + + test_wrmsr(MSR_IA32_XSS, 1); + GUEST_ASSERT(guest_exception_count == 1); + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD. + * + * A GP is thrown if MSR_IA32_FLUSH_CMD is read + * from or if a value other than 1 is written to it. + */ + test_rdmsr(MSR_IA32_FLUSH_CMD); + GUEST_ASSERT(guest_exception_count == 1); + + test_wrmsr(MSR_IA32_FLUSH_CMD, 0); + GUEST_ASSERT(guest_exception_count == 1); + + test_wrmsr(MSR_IA32_FLUSH_CMD, 1); + GUEST_ASSERT(guest_exception_count == 0); + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT. + * + * Test that a fabricated MSR can pass through the kernel + * and be handled in userspace. + */ + test_wrmsr(MSR_NON_EXISTENT, 2); + GUEST_ASSERT(guest_exception_count == 0); + + data = test_rdmsr(MSR_NON_EXISTENT); + GUEST_ASSERT(data == 2); + GUEST_ASSERT(guest_exception_count == 0); + + /* + * Test to see if the instruction emulator is available (ie: the module + * parameter 'kvm.force_emulation_prefix=1' is set). This instruction + * will #UD if it isn't available. + */ + __asm__ __volatile__(KVM_FEP "nop"); + + if (fep_available) { + /* Let userspace know we aren't done. */ + GUEST_SYNC(0); + + /* + * Now run the same tests with the instruction emulator. + */ + data = test_em_rdmsr(MSR_IA32_XSS); + GUEST_ASSERT(data == 0); + GUEST_ASSERT(guest_exception_count == 0); + test_em_wrmsr(MSR_IA32_XSS, 0); + GUEST_ASSERT(guest_exception_count == 0); + test_em_wrmsr(MSR_IA32_XSS, 1); + GUEST_ASSERT(guest_exception_count == 1); + + test_em_rdmsr(MSR_IA32_FLUSH_CMD); + GUEST_ASSERT(guest_exception_count == 1); + test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0); + GUEST_ASSERT(guest_exception_count == 1); + test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1); + GUEST_ASSERT(guest_exception_count == 0); + + test_em_wrmsr(MSR_NON_EXISTENT, 2); + GUEST_ASSERT(guest_exception_count == 0); + data = test_em_rdmsr(MSR_NON_EXISTENT); + GUEST_ASSERT(data == 2); + GUEST_ASSERT(guest_exception_count == 0); + } + + GUEST_DONE(); +} + + +static void guest_code_permission_bitmap(void) +{ + uint64_t data; + + test_wrmsr(MSR_FS_BASE, 0); + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data == MSR_FS_BASE); + + test_wrmsr(MSR_GS_BASE, 0); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data == 0); + + /* Let userspace know to switch the filter */ + GUEST_SYNC(0); + + test_wrmsr(MSR_FS_BASE, 0); + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data == 0); + + test_wrmsr(MSR_GS_BASE, 0); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data == MSR_GS_BASE); + + GUEST_DONE(); +} + +static void __guest_gp_handler(struct ex_regs *regs, + char *r_start, char *r_end, + char *w_start, char *w_end) +{ + if (regs->rip == (uintptr_t)r_start) { + regs->rip = (uintptr_t)r_end; + regs->rax = 0; + regs->rdx = 0; + } else if (regs->rip == (uintptr_t)w_start) { + regs->rip = (uintptr_t)w_end; + } else { + GUEST_ASSERT(!"RIP is at an unknown location!"); + } + + ++guest_exception_count; +} + +static void guest_gp_handler(struct ex_regs *regs) +{ + __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end, + &wrmsr_start, &wrmsr_end); +} + +static void guest_fep_gp_handler(struct ex_regs *regs) +{ + __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end, + &em_wrmsr_start, &em_wrmsr_end); +} + +static void guest_ud_handler(struct ex_regs *regs) +{ + fep_available = 0; + regs->rip += KVM_FEP_LENGTH; +} + +static void run_guest(struct kvm_vm *vm) +{ + int rc; + + rc = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); +} + +static void check_for_guest_assert(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + if (run->exit_reason == KVM_EXIT_IO && + get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) { + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + } +} + +static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->msr.index == msr_index, + "Unexpected msr (0x%04x), expected 0x%04x", + run->msr.index, msr_index); + + switch (run->msr.index) { + case MSR_IA32_XSS: + run->msr.data = 0; + break; + case MSR_IA32_FLUSH_CMD: + run->msr.error = 1; + break; + case MSR_NON_EXISTENT: + run->msr.data = msr_non_existent_data; + break; + case MSR_FS_BASE: + run->msr.data = MSR_FS_BASE; + break; + case MSR_GS_BASE: + run->msr.data = MSR_GS_BASE; + break; + default: + TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); + } +} + +static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->msr.index == msr_index, + "Unexpected msr (0x%04x), expected 0x%04x", + run->msr.index, msr_index); + + switch (run->msr.index) { + case MSR_IA32_XSS: + if (run->msr.data != 0) + run->msr.error = 1; + break; + case MSR_IA32_FLUSH_CMD: + if (run->msr.data != 1) + run->msr.error = 1; + break; + case MSR_NON_EXISTENT: + msr_non_existent_data = run->msr.data; + break; + case MSR_FS_BASE: + case MSR_GS_BASE: + break; + default: + TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); + } +} + +static void process_ucall_done(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE, + "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", + uc.cmd, UCALL_DONE); +} + +static uint64_t process_ucall(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc = {}; + + check_for_guest_assert(vm); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + break; + case UCALL_ABORT: + check_for_guest_assert(vm); + break; + case UCALL_DONE: + process_ucall_done(vm); + break; + default: + TEST_ASSERT(false, "Unexpected ucall"); + } + + return uc.cmd; +} + +static void run_guest_then_process_rdmsr(struct kvm_vm *vm, uint32_t msr_index) +{ + run_guest(vm); + process_rdmsr(vm, msr_index); +} + +static void run_guest_then_process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) +{ + run_guest(vm); + process_wrmsr(vm, msr_index); +} + +static uint64_t run_guest_then_process_ucall(struct kvm_vm *vm) +{ + run_guest(vm); + return process_ucall(vm); +} + +static void run_guest_then_process_ucall_done(struct kvm_vm *vm) +{ + run_guest(vm); + process_ucall_done(vm); +} + +static void test_msr_filter(void) { + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_USER_SPACE_MSR, + .args[0] = KVM_MSR_EXIT_REASON_FILTER, + }; + struct kvm_vm *vm; + int rc; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, &cap); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + /* Process guest code userspace exits. */ + run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + + run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + + run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); + run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); + + vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + run_guest(vm); + vm_handle_exception(vm, UD_VECTOR, NULL); + + if (process_ucall(vm) != UCALL_DONE) { + vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler); + + /* Process emulated rdmsr and wrmsr instructions. */ + run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + + run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + + run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); + run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); + + /* Confirm the guest completed without issues. */ + run_guest_then_process_ucall_done(vm); + } else { + printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n"); + } + + kvm_vm_free(vm); +} + +static void test_msr_permission_bitmap(void) { + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_USER_SPACE_MSR, + .args[0] = KVM_MSR_EXIT_REASON_FILTER, + }; + struct kvm_vm *vm; + int rc; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code_permission_bitmap); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, &cap); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs); + run_guest_then_process_wrmsr(vm, MSR_FS_BASE); + run_guest_then_process_rdmsr(vm, MSR_FS_BASE); + TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC."); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); + run_guest_then_process_wrmsr(vm, MSR_GS_BASE); + run_guest_then_process_rdmsr(vm, MSR_GS_BASE); + run_guest_then_process_ucall_done(vm); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + test_msr_filter(); + + test_msr_permission_bitmap(); + + return 0; +} -- cgit v1.3.1 From fb6360534ecc0a2703f7b6076cf1397385d23df8 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Fri, 4 Dec 2020 09:25:31 -0800 Subject: selftests: kvm: Merge user_msr_test into userspace_msr_exit_test Both user_msr_test and userspace_msr_exit_test tests the functionality of kvm_msr_filter. Instead of testing this feature in two tests, merge them together, so there is only one test for this feature. Signed-off-by: Aaron Lewis Message-Id: <20201204172530.2958493-1-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 - tools/testing/selftests/kvm/Makefile | 1 - tools/testing/selftests/kvm/x86_64/user_msr_test.c | 251 -------------------- .../selftests/kvm/x86_64/userspace_msr_exit_test.c | 262 +++++++++++++++++++-- 4 files changed, 236 insertions(+), 279 deletions(-) delete mode 100644 tools/testing/selftests/kvm/x86_64/user_msr_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 5008bf90772b..ce8f4ad39684 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -17,7 +17,6 @@ /x86_64/svm_vmcall_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test -/x86_64/user_msr_test /x86_64/userspace_msr_exit_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index d54870db21c4..86ccca8ca89d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -50,7 +50,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c deleted file mode 100644 index fe88b98908ee..000000000000 --- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c +++ /dev/null @@ -1,251 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER - * - * Copyright (C) 2020, Amazon Inc. - * - * This is a functional test to verify that we can deflect MSR events - * into user space. - */ -#define _GNU_SOURCE /* for program_invocation_short_name */ -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" - -#define VCPU_ID 5 - -static u32 msr_reads, msr_writes; - -static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_deadbeef[1] = { 0x1 }; - -static void deny_msr(uint8_t *bitmap, u32 msr) -{ - u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); - - bitmap[idx / 8] &= ~(1 << (idx % 8)); -} - -static void prepare_bitmaps(void) -{ - memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000)); - memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write)); - memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000)); - memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000)); - memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read)); - - deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL); - deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK); - deny_msr(bitmap_c0000000_read, MSR_GS_BASE); -} - -struct kvm_msr_filter filter = { - .flags = KVM_MSR_FILTER_DEFAULT_DENY, - .ranges = { - { - .flags = KVM_MSR_FILTER_READ, - .base = 0x00000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_00000000, - }, { - .flags = KVM_MSR_FILTER_WRITE, - .base = 0x00000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_00000000_write, - }, { - .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, - .base = 0x40000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_40000000, - }, { - .flags = KVM_MSR_FILTER_READ, - .base = 0xc0000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_c0000000_read, - }, { - .flags = KVM_MSR_FILTER_WRITE, - .base = 0xc0000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_c0000000, - }, { - .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ, - .base = 0xdeadbeef, - .nmsrs = 1, - .bitmap = bitmap_deadbeef, - }, - }, -}; - -struct kvm_msr_filter no_filter = { - .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, -}; - -static void guest_msr_calls(bool trapped) -{ - /* This goes into the in-kernel emulation */ - wrmsr(MSR_SYSCALL_MASK, 0); - - if (trapped) { - /* This goes into user space emulation */ - GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK); - GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE); - } else { - GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK); - GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE); - } - - /* If trapped == true, this goes into user space emulation */ - wrmsr(MSR_IA32_POWER_CTL, 0x1234); - - /* This goes into the in-kernel emulation */ - rdmsr(MSR_IA32_POWER_CTL); - - /* Invalid MSR, should always be handled by user space exit */ - GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef); - wrmsr(0xdeadbeef, 0x1234); -} - -static void guest_code(void) -{ - guest_msr_calls(true); - - /* - * Disable msr filtering, so that the kernel - * handles everything in the next round - */ - GUEST_SYNC(0); - - guest_msr_calls(false); - - GUEST_DONE(); -} - -static int handle_ucall(struct kvm_vm *vm) -{ - struct ucall uc; - - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_ABORT: - TEST_FAIL("Guest assertion not met"); - break; - case UCALL_SYNC: - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter); - break; - case UCALL_DONE: - return 1; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - return 0; -} - -static void handle_rdmsr(struct kvm_run *run) -{ - run->msr.data = run->msr.index; - msr_reads++; - - if (run->msr.index == MSR_SYSCALL_MASK || - run->msr.index == MSR_GS_BASE) { - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, - "MSR read trap w/o access fault"); - } - - if (run->msr.index == 0xdeadbeef) { - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, - "MSR deadbeef read trap w/o inval fault"); - } -} - -static void handle_wrmsr(struct kvm_run *run) -{ - /* ignore */ - msr_writes++; - - if (run->msr.index == MSR_IA32_POWER_CTL) { - TEST_ASSERT(run->msr.data == 0x1234, - "MSR data for MSR_IA32_POWER_CTL incorrect"); - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, - "MSR_IA32_POWER_CTL trap w/o access fault"); - } - - if (run->msr.index == 0xdeadbeef) { - TEST_ASSERT(run->msr.data == 0x1234, - "MSR data for deadbeef incorrect"); - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, - "deadbeef trap w/o inval fault"); - } -} - -int main(int argc, char *argv[]) -{ - struct kvm_enable_cap cap = { - .cap = KVM_CAP_X86_USER_SPACE_MSR, - .args[0] = KVM_MSR_EXIT_REASON_INVAL | - KVM_MSR_EXIT_REASON_UNKNOWN | - KVM_MSR_EXIT_REASON_FILTER, - }; - struct kvm_vm *vm; - struct kvm_run *run; - int rc; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); - - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); - if (!rc) { - print_skip("KVM_CAP_X86_USER_SPACE_MSR not supported"); - exit(KSFT_SKIP); - } - - vm_enable_cap(vm, &cap); - - rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); - TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); - - prepare_bitmaps(); - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter); - - while (1) { - rc = _vcpu_run(vm, VCPU_ID); - - TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); - - switch (run->exit_reason) { - case KVM_EXIT_X86_RDMSR: - handle_rdmsr(run); - break; - case KVM_EXIT_X86_WRMSR: - handle_wrmsr(run); - break; - case KVM_EXIT_IO: - if (handle_ucall(vm)) - goto done; - break; - } - - } - -done: - TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); - TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index e8b6918cdea0..72c0d0797522 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -20,8 +20,8 @@ static int fep_available = 1; #define VCPU_ID 1 #define MSR_NON_EXISTENT 0x474f4f00 -u64 deny_bits = 0; -struct kvm_msr_filter filter = { +static u64 deny_bits = 0; +struct kvm_msr_filter filter_allow = { .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, .ranges = { { @@ -53,8 +53,7 @@ struct kvm_msr_filter filter_fs = { .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, .ranges = { { - .flags = KVM_MSR_FILTER_READ | - KVM_MSR_FILTER_WRITE, + .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = MSR_FS_BASE, .bitmap = (uint8_t*)&deny_bits, @@ -66,8 +65,7 @@ struct kvm_msr_filter filter_gs = { .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, .ranges = { { - .flags = KVM_MSR_FILTER_READ | - KVM_MSR_FILTER_WRITE, + .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = MSR_GS_BASE, .bitmap = (uint8_t*)&deny_bits, @@ -75,8 +73,77 @@ struct kvm_msr_filter filter_gs = { }, }; -uint64_t msr_non_existent_data; -int guest_exception_count; +static uint64_t msr_non_existent_data; +static int guest_exception_count; +static u32 msr_reads, msr_writes; + +static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_deadbeef[1] = { 0x1 }; + +static void deny_msr(uint8_t *bitmap, u32 msr) +{ + u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); + + bitmap[idx / 8] &= ~(1 << (idx % 8)); +} + +static void prepare_bitmaps(void) +{ + memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000)); + memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write)); + memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000)); + memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000)); + memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read)); + + deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL); + deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK); + deny_msr(bitmap_c0000000_read, MSR_GS_BASE); +} + +struct kvm_msr_filter filter_deny = { + .flags = KVM_MSR_FILTER_DEFAULT_DENY, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000_write, + }, { + .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, + .base = 0x40000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_40000000, + }, { + .flags = KVM_MSR_FILTER_READ, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000_read, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000, + }, { + .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ, + .base = 0xdeadbeef, + .nmsrs = 1, + .bitmap = bitmap_deadbeef, + }, + }, +}; + +struct kvm_msr_filter no_filter_deny = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, +}; /* * Note: Force test_rdmsr() to not be inlined to prevent the labels, @@ -146,7 +213,7 @@ static noinline void test_em_wrmsr(uint32_t msr, uint64_t value) extern char em_rdmsr_start, em_rdmsr_end; extern char em_wrmsr_start, em_wrmsr_end; -static void guest_code(void) +static void guest_code_filter_allow(void) { uint64_t data; @@ -233,27 +300,60 @@ static void guest_code(void) GUEST_DONE(); } +static void guest_msr_calls(bool trapped) +{ + /* This goes into the in-kernel emulation */ + wrmsr(MSR_SYSCALL_MASK, 0); + + if (trapped) { + /* This goes into user space emulation */ + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE); + } else { + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE); + } + + /* If trapped == true, this goes into user space emulation */ + wrmsr(MSR_IA32_POWER_CTL, 0x1234); + + /* This goes into the in-kernel emulation */ + rdmsr(MSR_IA32_POWER_CTL); + + /* Invalid MSR, should always be handled by user space exit */ + GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef); + wrmsr(0xdeadbeef, 0x1234); +} + +static void guest_code_filter_deny(void) +{ + guest_msr_calls(true); + + /* + * Disable msr filtering, so that the kernel + * handles everything in the next round + */ + GUEST_SYNC(0); + + guest_msr_calls(false); + + GUEST_DONE(); +} static void guest_code_permission_bitmap(void) { uint64_t data; - test_wrmsr(MSR_FS_BASE, 0); data = test_rdmsr(MSR_FS_BASE); GUEST_ASSERT(data == MSR_FS_BASE); - - test_wrmsr(MSR_GS_BASE, 0); data = test_rdmsr(MSR_GS_BASE); - GUEST_ASSERT(data == 0); + GUEST_ASSERT(data != MSR_GS_BASE); /* Let userspace know to switch the filter */ GUEST_SYNC(0); - test_wrmsr(MSR_FS_BASE, 0); data = test_rdmsr(MSR_FS_BASE); - GUEST_ASSERT(data == 0); - - test_wrmsr(MSR_GS_BASE, 0); + GUEST_ASSERT(data != MSR_FS_BASE); data = test_rdmsr(MSR_GS_BASE); GUEST_ASSERT(data == MSR_GS_BASE); @@ -376,9 +476,6 @@ static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) case MSR_NON_EXISTENT: msr_non_existent_data = run->msr.data; break; - case MSR_FS_BASE: - case MSR_GS_BASE: - break; default: TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); } @@ -453,7 +550,7 @@ static void run_guest_then_process_ucall_done(struct kvm_vm *vm) process_ucall_done(vm); } -static void test_msr_filter(void) { +static void test_msr_filter_allow(void) { struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR, .args[0] = KVM_MSR_EXIT_REASON_FILTER, @@ -462,7 +559,7 @@ static void test_msr_filter(void) { int rc; /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_default(VCPU_ID, 0, guest_code_filter_allow); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); @@ -472,7 +569,7 @@ static void test_msr_filter(void) { rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); @@ -519,6 +616,116 @@ static void test_msr_filter(void) { kvm_vm_free(vm); } +static int handle_ucall(struct kvm_vm *vm) +{ + struct ucall uc; + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("Guest assertion not met"); + break; + case UCALL_SYNC: + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny); + break; + case UCALL_DONE: + return 1; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + return 0; +} + +static void handle_rdmsr(struct kvm_run *run) +{ + run->msr.data = run->msr.index; + msr_reads++; + + if (run->msr.index == MSR_SYSCALL_MASK || + run->msr.index == MSR_GS_BASE) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR read trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "MSR deadbeef read trap w/o inval fault"); + } +} + +static void handle_wrmsr(struct kvm_run *run) +{ + /* ignore */ + msr_writes++; + + if (run->msr.index == MSR_IA32_POWER_CTL) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for MSR_IA32_POWER_CTL incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR_IA32_POWER_CTL trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for deadbeef incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "deadbeef trap w/o inval fault"); + } +} + +static void test_msr_filter_deny(void) { + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_USER_SPACE_MSR, + .args[0] = KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER, + }; + struct kvm_vm *vm; + struct kvm_run *run; + int rc; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code_filter_deny); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, &cap); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + prepare_bitmaps(); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny); + + while (1) { + rc = _vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); + + switch (run->exit_reason) { + case KVM_EXIT_X86_RDMSR: + handle_rdmsr(run); + break; + case KVM_EXIT_X86_WRMSR: + handle_wrmsr(run); + break; + case KVM_EXIT_IO: + if (handle_ucall(vm)) + goto done; + break; + } + + } + +done: + TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); + TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); + + kvm_vm_free(vm); +} + static void test_msr_permission_bitmap(void) { struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR, @@ -539,11 +746,9 @@ static void test_msr_permission_bitmap(void) { TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs); - run_guest_then_process_wrmsr(vm, MSR_FS_BASE); run_guest_then_process_rdmsr(vm, MSR_FS_BASE); TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); - run_guest_then_process_wrmsr(vm, MSR_GS_BASE); run_guest_then_process_rdmsr(vm, MSR_GS_BASE); run_guest_then_process_ucall_done(vm); @@ -552,7 +757,12 @@ static void test_msr_permission_bitmap(void) { int main(int argc, char *argv[]) { - test_msr_filter(); + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + test_msr_filter_allow(); + + test_msr_filter_deny(); test_msr_permission_bitmap(); -- cgit v1.3.1 From 111d0bda8eeb4b54e0c63897b071effbf9fd9251 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Tue, 8 Dec 2020 22:08:29 +0100 Subject: tools/kvm_stat: Exempt time-based counters The new counters halt_poll_success_ns and halt_poll_fail_ns do not count events. Instead they provide a time, and mess up our statistics. Therefore, we should exclude them. Removal is currently implemented with an exempt list. If more counters like these appear, we can think about a more general rule like excluding all fields name "*_ns", in case that's a standing convention. Signed-off-by: Stefan Raspl Tested-and-reported-by: Christian Borntraeger Message-Id: <20201208210829.101324-1-raspl@linux.ibm.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index d199a3694be8..b0bf56c5f120 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -742,7 +742,11 @@ class DebugfsProvider(Provider): The fields are all available KVM debugfs files """ - return self.walkdir(PATH_DEBUGFS_KVM)[2] + exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns'] + fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2] + if field not in exempt_list] + + return fields def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" -- cgit v1.3.1 From fe62de310e2b563c0d303a09d06b020077fe86b4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Dec 2020 13:58:24 -0800 Subject: libbpf: Support modules in bpf_program__set_attach_target() API Support finding kernel targets in kernel modules when using bpf_program__set_attach_target() API. This brings it up to par with what libbpf supports when doing declarative SEC()-based target determination. Some minor internal refactoring was needed to make sure vmlinux BTF can be loaded before bpf_object's load phase. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201211215825.3646154-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 64 +++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9be88a90a4aa..6ae748f6ea11 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2518,7 +2518,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) return 0; } -static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) +static bool prog_needs_vmlinux_btf(struct bpf_program *prog) { if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || prog->type == BPF_PROG_TYPE_LSM) @@ -2533,37 +2533,43 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) return false; } -static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) +static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) { - bool need_vmlinux_btf = false; struct bpf_program *prog; - int i, err; + int i; /* CO-RE relocations need kernel BTF */ if (obj->btf_ext && obj->btf_ext->core_relo_info.len) - need_vmlinux_btf = true; + return true; /* Support for typed ksyms needs kernel BTF */ for (i = 0; i < obj->nr_extern; i++) { const struct extern_desc *ext; ext = &obj->externs[i]; - if (ext->type == EXT_KSYM && ext->ksym.type_id) { - need_vmlinux_btf = true; - break; - } + if (ext->type == EXT_KSYM && ext->ksym.type_id) + return true; } bpf_object__for_each_program(prog, obj) { if (!prog->load) continue; - if (libbpf_prog_needs_vmlinux_btf(prog)) { - need_vmlinux_btf = true; - break; - } + if (prog_needs_vmlinux_btf(prog)) + return true; } - if (!need_vmlinux_btf) + return false; +} + +static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) +{ + int err; + + /* btf_vmlinux could be loaded earlier */ + if (obj->btf_vmlinux) + return 0; + + if (!force && !obj_needs_vmlinux_btf(obj)) return 0; obj->btf_vmlinux = libbpf_find_kernel_btf(); @@ -7475,7 +7481,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) } err = bpf_object__probe_loading(obj); - err = err ? : bpf_object__load_vmlinux_btf(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__sanitize_maps(obj); @@ -10870,23 +10876,33 @@ int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name) { - int btf_id; + int btf_obj_fd = 0, btf_id = 0, err; if (!prog || attach_prog_fd < 0 || !attach_func_name) return -EINVAL; - if (attach_prog_fd) + if (prog->obj->loaded) + return -EINVAL; + + if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, attach_prog_fd); - else - btf_id = libbpf_find_vmlinux_btf_id(attach_func_name, - prog->expected_attach_type); - - if (btf_id < 0) - return btf_id; + if (btf_id < 0) + return btf_id; + } else { + /* load btf_vmlinux, if not yet */ + err = bpf_object__load_vmlinux_btf(prog->obj, true); + if (err) + return err; + err = find_kernel_btf_id(prog->obj, attach_func_name, + prog->expected_attach_type, + &btf_obj_fd, &btf_id); + if (err) + return err; + } prog->attach_btf_id = btf_id; - prog->attach_btf_obj_fd = 0; + prog->attach_btf_obj_fd = btf_obj_fd; prog->attach_prog_fd = attach_prog_fd; return 0; } -- cgit v1.3.1 From 2e33f831fccd2df83836a8e255755f85d364aaeb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Dec 2020 13:58:25 -0800 Subject: selftests/bpf: Add set_attach_target() API selftest for module target Add test for bpf_program__set_attach_target() API, validating it can find kernel module fentry target. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201211215825.3646154-3-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/module_attach.c | 11 ++++++++++- tools/testing/selftests/bpf/progs/test_module_attach.c | 11 +++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 4b65e9918764..50796b651f72 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -28,10 +28,18 @@ void test_module_attach(void) struct test_module_attach__bss *bss; int err; - skel = test_module_attach__open_and_load(); + skel = test_module_attach__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) return; + err = bpf_program__set_attach_target(skel->progs.handle_fentry_manual, + 0, "bpf_testmod_test_read"); + ASSERT_OK(err, "set_attach_target"); + + err = test_module_attach__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton\n")) + return; + bss = skel->bss; err = test_module_attach__attach(skel); @@ -44,6 +52,7 @@ void test_module_attach(void) ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp"); ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf"); ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry"); + ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual"); ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit"); ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index b563563df172..efd1e287ac17 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -38,6 +38,17 @@ int BPF_PROG(handle_fentry, return 0; } +__u32 fentry_manual_read_sz = 0; + +SEC("fentry/placeholder") +int BPF_PROG(handle_fentry_manual, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + fentry_manual_read_sz = len; + return 0; +} + __u32 fexit_read_sz = 0; int fexit_ret = 0; -- cgit v1.3.1 From a4d2a7ad86834092b327082004ead755d2603376 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Mon, 14 Dec 2020 11:38:12 +0000 Subject: libbpf: Expose libbpf ring_buffer epoll_fd This provides a convenient perf ringbuf -> libbpf ringbuf migration path for users of external polling systems. It is analogous to perf_buffer__epoll_fd. Signed-off-by: Brendan Jackman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201214113812.305274-1-jackmanb@google.com --- tools/lib/bpf/libbpf.h | 1 + tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/ringbuf.c | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6909ee81113a..3c35eb401931 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -536,6 +536,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd, ring_buffer_sample_fn sample_cb, void *ctx); LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); +LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); /* Perf buffer APIs */ struct perf_buffer; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7c4126542e2b..1c0fd2dd233a 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -346,6 +346,7 @@ LIBBPF_0.3.0 { btf__parse_split; btf__new_empty_split; btf__new_split; + ring_buffer__epoll_fd; xsk_setup_xdp_prog; xsk_socket__update_xskmap; } LIBBPF_0.2.0; diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 5c6522c89af1..a8f997d47adf 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -282,3 +282,9 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) } return cnt < 0 ? -errno : res; } + +/* Get an fd that can be used to sleep until data is available in the ring(s) */ +int ring_buffer__epoll_fd(const struct ring_buffer *rb) +{ + return rb->epoll_fd; +} -- cgit v1.3.1 From b4b638c36b7e7acd847b9c4b9c80f268e45ea30c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 9 Dec 2020 17:33:50 -0800 Subject: selftests/bpf: Add a test for ptr_to_map_value on stack for helper access Change bpf_iter_task.c such that pointer to map_value may appear on the stack for bpf_seq_printf() to access. Without previous verifier patch, the bpf_iter test will fail. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201210013350.943985-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/bpf_iter_task.c | 3 ++- tools/testing/selftests/bpf/verifier/unpriv.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index 4983087852a0..b7f32c160f4e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -11,9 +11,10 @@ int dump_task(struct bpf_iter__task *ctx) { struct seq_file *seq = ctx->meta->seq; struct task_struct *task = ctx->task; + static char info[] = " === END ==="; if (task == (void *)0) { - BPF_SEQ_PRINTF(seq, " === END ===\n"); + BPF_SEQ_PRINTF(seq, "%s\n", info); return 0; } diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index 91bb77c24a2e..a3fe0fbaed41 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -108,8 +108,9 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "invalid indirect read from stack off -8+0 size 8", - .result = REJECT, + .errstr_unpriv = "invalid indirect read from stack off -8+0 size 8", + .result_unpriv = REJECT, + .result = ACCEPT, }, { "unpriv: mangle pointer on stack 1", -- cgit v1.3.1 From 6fe4ccdc3dabe3de573e27fb2684d925bd611458 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 10 Dec 2020 14:25:00 -0800 Subject: selftests: mptcp: add the flush addrs testcase This patch added the flush addrs testcase. In do_transfer, if the number of removing addresses is less than 8, use the del addr command to remove the addresses one by one. If the number is more than 8, use the flush addrs command to remove the addresses. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 50 ++++++++++++++++++------- 1 file changed, 36 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 0eae628d1ffd..9aa9624cff97 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -264,27 +264,37 @@ do_transfer() cpid=$! if [ $rm_nr_ns1 -gt 0 ]; then - counter=1 - sleep 1 + if [ $rm_nr_ns1 -lt 8 ]; then + counter=1 + sleep 1 - while [ $counter -le $rm_nr_ns1 ] - do - ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + while [ $counter -le $rm_nr_ns1 ] + do + ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + else sleep 1 - let counter+=1 - done + ip netns exec ${listener_ns} ./pm_nl_ctl flush + fi fi if [ $rm_nr_ns2 -gt 0 ]; then - counter=1 - sleep 1 + if [ $rm_nr_ns2 -lt 8 ]; then + counter=1 + sleep 1 - while [ $counter -le $rm_nr_ns2 ] - do - ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + while [ $counter -le $rm_nr_ns2 ] + do + ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + else sleep 1 - let counter+=1 - done + ip netns exec ${connector_ns} ./pm_nl_ctl flush + fi fi wait $cpid @@ -663,6 +673,18 @@ chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 +# subflows and signal, flush +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 0 8 8 slow +chk_join_nr "flush subflows and signal" 3 3 3 +chk_add_nr 1 1 +chk_rm_nr 2 2 + # subflow IPv6 reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -- cgit v1.3.1 From 0e12c0271887f1b00b79b7612c1d4f0d3d34e8a8 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 11 Dec 2020 12:24:20 +0800 Subject: selftests: test_vxlan_under_vrf: mute unnecessary error message The cleanup function in this script that tries to delete hv-1 / hv-2 vm-1 / vm-2 netns will generate some uncessary error messages: Cannot remove namespace file "/run/netns/hv-2": No such file or directory Cannot remove namespace file "/run/netns/vm-1": No such file or directory Cannot remove namespace file "/run/netns/vm-2": No such file or directory Redirect it to /dev/null like other commands in the cleanup function to reduce confusion. Signed-off-by: Po-Hsu Lin Link: https://lore.kernel.org/r/20201211042420.16411-1-po-hsu.lin@canonical.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/test_vxlan_under_vrf.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index 09f9ed92cbe4..534c8b7699ab 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -50,7 +50,7 @@ cleanup() { ip link del veth-tap 2>/dev/null || true for ns in hv-1 hv-2 vm-1 vm-2; do - ip netns del $ns || true + ip netns del $ns 2>/dev/null || true done } -- cgit v1.3.1 From b2ce5dbc15819ea4bef47dbd368239cb1e965158 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 19 Nov 2020 20:54:11 +0530 Subject: perf test: Fix metric parsing test Commit e1c92a7fbbc5 ("perf tests: Add another metric parsing test") add another test for metric parsing. The test goes through all metrics compiled for arch within pmu events and try to parse them. Right now this test is failing in powerpc machine. Result in power9 platform: [command]# ./perf test 10 10: PMU events : 10.1: PMU event table sanity : Ok 10.2: PMU event map aliases : Ok 10.3: Parsing of PMU event table metrics : Skip (some metrics failed) 10.4: Parsing of PMU event table metrics with fake PMUs : FAILED! Issue is we are passing different runtime parameter value in "expr__find_other" and "expr__parse" function which is called from function `metric_parse_fake`. And because of this parsing of hv-24x7 metrics is failing. [command]# ./perf test 10 -vv ..... hv_24x7/pm_mcs01_128b_rd_disp_port01,chip=1/ not found expr__parse failed test child finished with -1 ---- end ---- PMU events subtest 4: FAILED! This patch fix this issue and change runtime parameter value to '0' in expr__parse function. Result in power9 platform after this patch: [command]# ./perf test 10 10: PMU events : 10.1: PMU event table sanity : Ok 10.2: PMU event map aliases : Ok 10.3: Parsing of PMU event table metrics : Skip (some metrics failed) 10.4: Parsing of PMU event table metrics with fake PMUs : Ok Fixes: e1c92a7fbbc5 ("perf tests: Add another metric parsing test") Signed-off-by: Kajol Jain Acked-by: Ian Rogers Acked-by: Jiri Olsa Cc: Madhavan Srinivasan Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20201119152411.46041-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pmu-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index ad2b21591275..0ca6a5a53523 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -575,7 +575,7 @@ static int metric_parse_fake(const char *str) } } - if (expr__parse(&result, &ctx, str, 1)) + if (expr__parse(&result, &ctx, str, 0)) pr_err("expr__parse failed\n"); else ret = 0; -- cgit v1.3.1 From 9c84f229268fa229e250b7225611d0eb7094fea0 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:05 -0800 Subject: mm/gup_benchmark: rename to mm/gup_test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "selftests/vm: gup_test, hmm-tests, assorted improvements", v3. Summary: This series provides two main things, and a number of smaller supporting goodies. The two main points are: 1) Add a new sub-test to gup_test, which in turn is a renamed version of gup_benchmark. This sub-test allows nicer testing of dump_pages(), at least on user-space pages. For quite a while, I was doing a quick hack to gup_test.c whenever I wanted to try out changes to dump_page(). Then Matthew Wilcox asked me what I meant when I said "I used my dump_page() unit test", and I realized that it might be nice to check in a polished up version of that. Details about how it works and how to use it are in the commit description for patch #6 ("selftests/vm: gup_test: introduce the dump_pages() sub-test"). 2) Fixes a limitation of hmm-tests: these tests are incredibly useful, but only if people actually build and run them. And it turns out that libhugetlbfs is a little too effective at throwing a wrench in the works, there. So I've added a little configuration check that removes just two of the 21 hmm-tests, if libhugetlbfs is not available. Further details in the commit description of patch #8 ("selftests/vm: hmm-tests: remove the libhugetlbfs dependency"). Other smaller things that this series does: a) Remove code duplication by creating gup_test.h. b) Clear up the sub-test organization, and their invocation within run_vmtests.sh. c) Other minor assorted improvements. [1] v2 is here: https://lore.kernel.org/linux-doc/20200929212747.251804-1-jhubbard@nvidia.com/ [2] https://lore.kernel.org/r/CAHk-=wgh-TMPHLY3jueHX7Y2fWh3D+nMBqVS__AZm6-oorquWA@mail.gmail.com This patch (of 9): Rename nearly every "gup_benchmark" reference and file name to "gup_test". The one exception is for the actual gup benchmark test itself. The current code already does a *little* bit more than benchmarking, and definitely covers more than get_user_pages_fast(). More importantly, however, subsequent patches are about to add some functionality that is non-benchmark related. Closely related changes: * Kconfig: in addition to renaming the options from GUP_BENCHMARK to GUP_TEST, update the help text to reflect that it's no longer a benchmark-only test. Link: https://lkml.kernel.org/r/20201026064021.3545418-1-jhubbard@nvidia.com Link: https://lkml.kernel.org/r/20201026064021.3545418-2-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Jonathan Corbet Cc: Jérôme Glisse Cc: Ralph Campbell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/pin_user_pages.rst | 6 +- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/defconfig | 2 +- mm/Kconfig | 15 ++- mm/Makefile | 2 +- mm/gup_benchmark.c | 210 ----------------------------- mm/gup_test.c | 210 +++++++++++++++++++++++++++++ tools/testing/selftests/vm/.gitignore | 2 +- tools/testing/selftests/vm/Makefile | 2 +- tools/testing/selftests/vm/config | 2 +- tools/testing/selftests/vm/gup_benchmark.c | 143 -------------------- tools/testing/selftests/vm/gup_test.c | 143 ++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests | 8 +- 13 files changed, 376 insertions(+), 371 deletions(-) delete mode 100644 mm/gup_benchmark.c create mode 100644 mm/gup_test.c delete mode 100644 tools/testing/selftests/vm/gup_benchmark.c create mode 100644 tools/testing/selftests/vm/gup_test.c (limited to 'tools') diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst index 7ca8c7bac650..eae972b23224 100644 --- a/Documentation/core-api/pin_user_pages.rst +++ b/Documentation/core-api/pin_user_pages.rst @@ -221,12 +221,12 @@ Unit testing ============ This file:: - tools/testing/selftests/vm/gup_benchmark.c + tools/testing/selftests/vm/gup_test.c has the following new calls to exercise the new pin*() wrapper functions: -* PIN_FAST_BENCHMARK (./gup_benchmark -a) -* PIN_BENCHMARK (./gup_benchmark -b) +* PIN_FAST_BENCHMARK (./gup_test -a) +* PIN_BENCHMARK (./gup_test -b) You can monitor how many total dma-pinned pages have been acquired and released since the system was booted, via two new /proc/vmstat entries: :: diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index fe6f529ac82c..3601c28d1526 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -102,7 +102,7 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y -CONFIG_GUP_BENCHMARK=y +CONFIG_GUP_TEST=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 17d5df2c1eff..e2171a008809 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -95,7 +95,7 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y -CONFIG_GUP_BENCHMARK=y +CONFIG_GUP_TEST=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m diff --git a/mm/Kconfig b/mm/Kconfig index 390165ffbb0f..e25e5cb2989f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -821,13 +821,18 @@ config PERCPU_STATS information includes global and per chunk statistics, which can be used to help understand percpu memory usage. -config GUP_BENCHMARK - bool "Enable infrastructure for get_user_pages() and related calls benchmarking" +config GUP_TEST + bool "Enable infrastructure for get_user_pages()-related unit tests" help - Provides /sys/kernel/debug/gup_benchmark that helps with testing - performance of get_user_pages() and related calls. + Provides /sys/kernel/debug/gup_test, which in turn provides a way + to make ioctl calls that can launch kernel-based unit tests for + the get_user_pages*() and pin_user_pages*() family of API calls. - See tools/testing/selftests/vm/gup_benchmark.c + These tests include benchmark testing of the _fast variants of + get_user_pages*() and pin_user_pages*(), as well as smoke tests of + the non-_fast variants. + + See tools/testing/selftests/vm/gup_test.c config GUP_GET_PTE_LOW_HIGH bool diff --git a/mm/Makefile b/mm/Makefile index d73aed0fc99c..069f216e109e 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -90,7 +90,7 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o -obj-$(CONFIG_GUP_BENCHMARK) += gup_benchmark.o +obj-$(CONFIG_GUP_TEST) += gup_test.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c deleted file mode 100644 index 8b3e5b5cd8fa..000000000000 --- a/mm/gup_benchmark.c +++ /dev/null @@ -1,210 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) -#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark) -#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark) -#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark) -#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark) - -struct gup_benchmark { - __u64 get_delta_usec; - __u64 put_delta_usec; - __u64 addr; - __u64 size; - __u32 nr_pages_per_call; - __u32 flags; - __u64 expansion[10]; /* For future use */ -}; - -static void put_back_pages(unsigned int cmd, struct page **pages, - unsigned long nr_pages) -{ - unsigned long i; - - switch (cmd) { - case GUP_FAST_BENCHMARK: - case GUP_BENCHMARK: - for (i = 0; i < nr_pages; i++) - put_page(pages[i]); - break; - - case PIN_FAST_BENCHMARK: - case PIN_BENCHMARK: - case PIN_LONGTERM_BENCHMARK: - unpin_user_pages(pages, nr_pages); - break; - } -} - -static void verify_dma_pinned(unsigned int cmd, struct page **pages, - unsigned long nr_pages) -{ - unsigned long i; - struct page *page; - - switch (cmd) { - case PIN_FAST_BENCHMARK: - case PIN_BENCHMARK: - case PIN_LONGTERM_BENCHMARK: - for (i = 0; i < nr_pages; i++) { - page = pages[i]; - if (WARN(!page_maybe_dma_pinned(page), - "pages[%lu] is NOT dma-pinned\n", i)) { - - dump_page(page, "gup_benchmark failure"); - break; - } - } - break; - } -} - -static int __gup_benchmark_ioctl(unsigned int cmd, - struct gup_benchmark *gup) -{ - ktime_t start_time, end_time; - unsigned long i, nr_pages, addr, next; - int nr; - struct page **pages; - int ret = 0; - bool needs_mmap_lock = - cmd != GUP_FAST_BENCHMARK && cmd != PIN_FAST_BENCHMARK; - - if (gup->size > ULONG_MAX) - return -EINVAL; - - nr_pages = gup->size / PAGE_SIZE; - pages = kvcalloc(nr_pages, sizeof(void *), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - if (needs_mmap_lock && mmap_read_lock_killable(current->mm)) { - ret = -EINTR; - goto free_pages; - } - - i = 0; - nr = gup->nr_pages_per_call; - start_time = ktime_get(); - for (addr = gup->addr; addr < gup->addr + gup->size; addr = next) { - if (nr != gup->nr_pages_per_call) - break; - - next = addr + nr * PAGE_SIZE; - if (next > gup->addr + gup->size) { - next = gup->addr + gup->size; - nr = (next - addr) / PAGE_SIZE; - } - - /* Filter out most gup flags: only allow a tiny subset here: */ - gup->flags &= FOLL_WRITE; - - switch (cmd) { - case GUP_FAST_BENCHMARK: - nr = get_user_pages_fast(addr, nr, gup->flags, - pages + i); - break; - case GUP_BENCHMARK: - nr = get_user_pages(addr, nr, gup->flags, pages + i, - NULL); - break; - case PIN_FAST_BENCHMARK: - nr = pin_user_pages_fast(addr, nr, gup->flags, - pages + i); - break; - case PIN_BENCHMARK: - nr = pin_user_pages(addr, nr, gup->flags, pages + i, - NULL); - break; - case PIN_LONGTERM_BENCHMARK: - nr = pin_user_pages(addr, nr, - gup->flags | FOLL_LONGTERM, - pages + i, NULL); - break; - default: - ret = -EINVAL; - goto unlock; - } - - if (nr <= 0) - break; - i += nr; - } - end_time = ktime_get(); - - /* Shifting the meaning of nr_pages: now it is actual number pinned: */ - nr_pages = i; - - gup->get_delta_usec = ktime_us_delta(end_time, start_time); - gup->size = addr - gup->addr; - - /* - * Take an un-benchmark-timed moment to verify DMA pinned - * state: print a warning if any non-dma-pinned pages are found: - */ - verify_dma_pinned(cmd, pages, nr_pages); - - start_time = ktime_get(); - - put_back_pages(cmd, pages, nr_pages); - - end_time = ktime_get(); - gup->put_delta_usec = ktime_us_delta(end_time, start_time); - -unlock: - if (needs_mmap_lock) - mmap_read_unlock(current->mm); -free_pages: - kvfree(pages); - return ret; -} - -static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd, - unsigned long arg) -{ - struct gup_benchmark gup; - int ret; - - switch (cmd) { - case GUP_FAST_BENCHMARK: - case GUP_BENCHMARK: - case PIN_FAST_BENCHMARK: - case PIN_BENCHMARK: - case PIN_LONGTERM_BENCHMARK: - break; - default: - return -EINVAL; - } - - if (copy_from_user(&gup, (void __user *)arg, sizeof(gup))) - return -EFAULT; - - ret = __gup_benchmark_ioctl(cmd, &gup); - if (ret) - return ret; - - if (copy_to_user((void __user *)arg, &gup, sizeof(gup))) - return -EFAULT; - - return 0; -} - -static const struct file_operations gup_benchmark_fops = { - .open = nonseekable_open, - .unlocked_ioctl = gup_benchmark_ioctl, -}; - -static int gup_benchmark_init(void) -{ - debugfs_create_file_unsafe("gup_benchmark", 0600, NULL, NULL, - &gup_benchmark_fops); - - return 0; -} - -late_initcall(gup_benchmark_init); diff --git a/mm/gup_test.c b/mm/gup_test.c new file mode 100644 index 000000000000..59472ea6aa39 --- /dev/null +++ b/mm/gup_test.c @@ -0,0 +1,210 @@ +#include +#include +#include +#include +#include +#include + +#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_test) +#define GUP_BENCHMARK _IOWR('g', 2, struct gup_test) +#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_test) +#define PIN_BENCHMARK _IOWR('g', 4, struct gup_test) +#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_test) + +struct gup_test { + __u64 get_delta_usec; + __u64 put_delta_usec; + __u64 addr; + __u64 size; + __u32 nr_pages_per_call; + __u32 flags; + __u64 expansion[10]; /* For future use */ +}; + +static void put_back_pages(unsigned int cmd, struct page **pages, + unsigned long nr_pages) +{ + unsigned long i; + + switch (cmd) { + case GUP_FAST_BENCHMARK: + case GUP_BENCHMARK: + for (i = 0; i < nr_pages; i++) + put_page(pages[i]); + break; + + case PIN_FAST_BENCHMARK: + case PIN_BENCHMARK: + case PIN_LONGTERM_BENCHMARK: + unpin_user_pages(pages, nr_pages); + break; + } +} + +static void verify_dma_pinned(unsigned int cmd, struct page **pages, + unsigned long nr_pages) +{ + unsigned long i; + struct page *page; + + switch (cmd) { + case PIN_FAST_BENCHMARK: + case PIN_BENCHMARK: + case PIN_LONGTERM_BENCHMARK: + for (i = 0; i < nr_pages; i++) { + page = pages[i]; + if (WARN(!page_maybe_dma_pinned(page), + "pages[%lu] is NOT dma-pinned\n", i)) { + + dump_page(page, "gup_test failure"); + break; + } + } + break; + } +} + +static int __gup_test_ioctl(unsigned int cmd, + struct gup_test *gup) +{ + ktime_t start_time, end_time; + unsigned long i, nr_pages, addr, next; + int nr; + struct page **pages; + int ret = 0; + bool needs_mmap_lock = + cmd != GUP_FAST_BENCHMARK && cmd != PIN_FAST_BENCHMARK; + + if (gup->size > ULONG_MAX) + return -EINVAL; + + nr_pages = gup->size / PAGE_SIZE; + pages = kvcalloc(nr_pages, sizeof(void *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + if (needs_mmap_lock && mmap_read_lock_killable(current->mm)) { + ret = -EINTR; + goto free_pages; + } + + i = 0; + nr = gup->nr_pages_per_call; + start_time = ktime_get(); + for (addr = gup->addr; addr < gup->addr + gup->size; addr = next) { + if (nr != gup->nr_pages_per_call) + break; + + next = addr + nr * PAGE_SIZE; + if (next > gup->addr + gup->size) { + next = gup->addr + gup->size; + nr = (next - addr) / PAGE_SIZE; + } + + /* Filter out most gup flags: only allow a tiny subset here: */ + gup->flags &= FOLL_WRITE; + + switch (cmd) { + case GUP_FAST_BENCHMARK: + nr = get_user_pages_fast(addr, nr, gup->flags, + pages + i); + break; + case GUP_BENCHMARK: + nr = get_user_pages(addr, nr, gup->flags, pages + i, + NULL); + break; + case PIN_FAST_BENCHMARK: + nr = pin_user_pages_fast(addr, nr, gup->flags, + pages + i); + break; + case PIN_BENCHMARK: + nr = pin_user_pages(addr, nr, gup->flags, pages + i, + NULL); + break; + case PIN_LONGTERM_BENCHMARK: + nr = pin_user_pages(addr, nr, + gup->flags | FOLL_LONGTERM, + pages + i, NULL); + break; + default: + ret = -EINVAL; + goto unlock; + } + + if (nr <= 0) + break; + i += nr; + } + end_time = ktime_get(); + + /* Shifting the meaning of nr_pages: now it is actual number pinned: */ + nr_pages = i; + + gup->get_delta_usec = ktime_us_delta(end_time, start_time); + gup->size = addr - gup->addr; + + /* + * Take an un-benchmark-timed moment to verify DMA pinned + * state: print a warning if any non-dma-pinned pages are found: + */ + verify_dma_pinned(cmd, pages, nr_pages); + + start_time = ktime_get(); + + put_back_pages(cmd, pages, nr_pages); + + end_time = ktime_get(); + gup->put_delta_usec = ktime_us_delta(end_time, start_time); + +unlock: + if (needs_mmap_lock) + mmap_read_unlock(current->mm); +free_pages: + kvfree(pages); + return ret; +} + +static long gup_test_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg) +{ + struct gup_test gup; + int ret; + + switch (cmd) { + case GUP_FAST_BENCHMARK: + case GUP_BENCHMARK: + case PIN_FAST_BENCHMARK: + case PIN_BENCHMARK: + case PIN_LONGTERM_BENCHMARK: + break; + default: + return -EINVAL; + } + + if (copy_from_user(&gup, (void __user *)arg, sizeof(gup))) + return -EFAULT; + + ret = __gup_test_ioctl(cmd, &gup); + if (ret) + return ret; + + if (copy_to_user((void __user *)arg, &gup, sizeof(gup))) + return -EFAULT; + + return 0; +} + +static const struct file_operations gup_test_fops = { + .open = nonseekable_open, + .unlocked_ioctl = gup_test_ioctl, +}; + +static int gup_test_init(void) +{ + debugfs_create_file_unsafe("gup_test", 0600, NULL, NULL, + &gup_test_fops); + + return 0; +} + +late_initcall(gup_test_init); diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 849e8226395a..2c8ddcf41c0e 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -15,7 +15,7 @@ userfaultfd mlock-intersect-test mlock-random-test virtual_address_range -gup_benchmark +gup_test va_128TBswitch map_fixed_noreplace write_to_hugetlbfs diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 691893afc15d..43723df2c6c4 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -23,7 +23,7 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test -TEST_GEN_FILES += gup_benchmark +TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 69dd0d1aa30b..60e82da0de85 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -3,4 +3,4 @@ CONFIG_USERFAULTFD=y CONFIG_TEST_VMALLOC=m CONFIG_DEVICE_PRIVATE=y CONFIG_TEST_HMM=m -CONFIG_GUP_BENCHMARK=y +CONFIG_GUP_TEST=y diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c deleted file mode 100644 index 1d4359341e44..000000000000 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ /dev/null @@ -1,143 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#define MB (1UL << 20) -#define PAGE_SIZE sysconf(_SC_PAGESIZE) - -#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) -#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark) - -/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */ -#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark) -#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark) -#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark) - -/* Just the flags we need, copied from mm.h: */ -#define FOLL_WRITE 0x01 /* check pte is writable */ - -struct gup_benchmark { - __u64 get_delta_usec; - __u64 put_delta_usec; - __u64 addr; - __u64 size; - __u32 nr_pages_per_call; - __u32 flags; - __u64 expansion[10]; /* For future use */ -}; - -int main(int argc, char **argv) -{ - struct gup_benchmark gup; - unsigned long size = 128 * MB; - int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; - int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE; - char *file = "/dev/zero"; - char *p; - - while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) { - switch (opt) { - case 'a': - cmd = PIN_FAST_BENCHMARK; - break; - case 'b': - cmd = PIN_BENCHMARK; - break; - case 'L': - cmd = PIN_LONGTERM_BENCHMARK; - break; - case 'm': - size = atoi(optarg) * MB; - break; - case 'r': - repeats = atoi(optarg); - break; - case 'n': - nr_pages = atoi(optarg); - break; - case 't': - thp = 1; - break; - case 'T': - thp = 0; - break; - case 'U': - cmd = GUP_BENCHMARK; - break; - case 'u': - cmd = GUP_FAST_BENCHMARK; - break; - case 'w': - write = 1; - break; - case 'f': - file = optarg; - break; - case 'S': - flags &= ~MAP_PRIVATE; - flags |= MAP_SHARED; - break; - case 'H': - flags |= (MAP_HUGETLB | MAP_ANONYMOUS); - break; - default: - return -1; - } - } - - filed = open(file, O_RDWR|O_CREAT); - if (filed < 0) { - perror("open"); - exit(filed); - } - - gup.nr_pages_per_call = nr_pages; - if (write) - gup.flags |= FOLL_WRITE; - - fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR); - if (fd == -1) { - perror("open"); - exit(1); - } - - p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); - if (p == MAP_FAILED) { - perror("mmap"); - exit(1); - } - gup.addr = (unsigned long)p; - - if (thp == 1) - madvise(p, size, MADV_HUGEPAGE); - else if (thp == 0) - madvise(p, size, MADV_NOHUGEPAGE); - - for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) - p[0] = 0; - - for (i = 0; i < repeats; i++) { - gup.size = size; - if (ioctl(fd, cmd, &gup)) { - perror("ioctl"); - exit(1); - } - - printf("Time: get:%lld put:%lld us", gup.get_delta_usec, - gup.put_delta_usec); - if (gup.size != size) - printf(", truncated (size: %lld)", gup.size); - printf("\n"); - } - - return 0; -} diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c new file mode 100644 index 000000000000..00b4731f535e --- /dev/null +++ b/tools/testing/selftests/vm/gup_test.c @@ -0,0 +1,143 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define MB (1UL << 20) +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_test) +#define GUP_BENCHMARK _IOWR('g', 2, struct gup_test) + +/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */ +#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_test) +#define PIN_BENCHMARK _IOWR('g', 4, struct gup_test) +#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_test) + +/* Just the flags we need, copied from mm.h: */ +#define FOLL_WRITE 0x01 /* check pte is writable */ + +struct gup_test { + __u64 get_delta_usec; + __u64 put_delta_usec; + __u64 addr; + __u64 size; + __u32 nr_pages_per_call; + __u32 flags; + __u64 expansion[10]; /* For future use */ +}; + +int main(int argc, char **argv) +{ + struct gup_test gup; + unsigned long size = 128 * MB; + int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE; + char *file = "/dev/zero"; + char *p; + + while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) { + switch (opt) { + case 'a': + cmd = PIN_FAST_BENCHMARK; + break; + case 'b': + cmd = PIN_BENCHMARK; + break; + case 'L': + cmd = PIN_LONGTERM_BENCHMARK; + break; + case 'm': + size = atoi(optarg) * MB; + break; + case 'r': + repeats = atoi(optarg); + break; + case 'n': + nr_pages = atoi(optarg); + break; + case 't': + thp = 1; + break; + case 'T': + thp = 0; + break; + case 'U': + cmd = GUP_BENCHMARK; + break; + case 'u': + cmd = GUP_FAST_BENCHMARK; + break; + case 'w': + write = 1; + break; + case 'f': + file = optarg; + break; + case 'S': + flags &= ~MAP_PRIVATE; + flags |= MAP_SHARED; + break; + case 'H': + flags |= (MAP_HUGETLB | MAP_ANONYMOUS); + break; + default: + return -1; + } + } + + filed = open(file, O_RDWR|O_CREAT); + if (filed < 0) { + perror("open"); + exit(filed); + } + + gup.nr_pages_per_call = nr_pages; + if (write) + gup.flags |= FOLL_WRITE; + + fd = open("/sys/kernel/debug/gup_test", O_RDWR); + if (fd == -1) { + perror("open"); + exit(1); + } + + p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); + if (p == MAP_FAILED) { + perror("mmap"); + exit(1); + } + gup.addr = (unsigned long)p; + + if (thp == 1) + madvise(p, size, MADV_HUGEPAGE); + else if (thp == 0) + madvise(p, size, MADV_NOHUGEPAGE); + + for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) + p[0] = 0; + + for (i = 0; i < repeats; i++) { + gup.size = size; + if (ioctl(fd, cmd, &gup)) { + perror("ioctl"); + exit(1); + } + + printf("Time: get:%lld put:%lld us", gup.get_delta_usec, + gup.put_delta_usec); + if (gup.size != size) + printf(", truncated (size: %lld)", gup.size); + printf("\n"); + } + + return 0; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index a3f4f30f0a2e..d1843d5f3c30 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -124,9 +124,9 @@ else fi echo "--------------------------------------------" -echo "running 'gup_benchmark -U' (normal/slow gup)" +echo "running 'gup_test -U' (normal/slow gup)" echo "--------------------------------------------" -./gup_benchmark -U +./gup_test -U if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -135,9 +135,9 @@ else fi echo "------------------------------------------" -echo "running gup_benchmark -b (pin_user_pages)" +echo "running gup_test -b (pin_user_pages)" echo "------------------------------------------" -./gup_benchmark -b +./gup_test -b if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 -- cgit v1.3.1 From b9dcfdff8b4b223280015281b5050976c484c80a Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:08 -0800 Subject: selftests/vm: use a common gup_test.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid the need to copy-paste the gup_test ioctl commands and the struct gup_test definition, between the kernel and the user space application, by providing a new header file for these. This allows easier and safer adding of new ioctl calls, as well as reducing the overall line count. Details: The header file has to be able to compile independently, because of the arguably unfortunate way that the Makefile is written: the Makefile tries to build all of its prerequisites, when really it should be only building the .c files, and leaving the other prerequisites (LOCAL_HDRS) as pure dependencies. That Makefile limitation is probably not worth fixing, but it explains why one of the includes had to be moved into the new header file. Also: simplify the ioctl struct (struct gup_test), by deleting the unused __expansion[10] field. This sort of thing is what you might see in a stable ABI, but this low-level, kernel-developer-oriented selftests/vm system is very much not subject to ABI stability. So "expansion" and "reserved" fields are unnecessary here. Link: https://lkml.kernel.org/r/20201026064021.3545418-3-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Ralph Campbell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup_test.c | 17 +---------------- mm/gup_test.h | 22 ++++++++++++++++++++++ tools/testing/selftests/vm/Makefile | 2 ++ tools/testing/selftests/vm/gup_test.c | 22 +--------------------- 4 files changed, 26 insertions(+), 37 deletions(-) create mode 100644 mm/gup_test.h (limited to 'tools') diff --git a/mm/gup_test.c b/mm/gup_test.c index 59472ea6aa39..4c2d70d88f24 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -4,22 +4,7 @@ #include #include #include - -#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_test) -#define GUP_BENCHMARK _IOWR('g', 2, struct gup_test) -#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_test) -#define PIN_BENCHMARK _IOWR('g', 4, struct gup_test) -#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_test) - -struct gup_test { - __u64 get_delta_usec; - __u64 put_delta_usec; - __u64 addr; - __u64 size; - __u32 nr_pages_per_call; - __u32 flags; - __u64 expansion[10]; /* For future use */ -}; +#include "gup_test.h" static void put_back_pages(unsigned int cmd, struct page **pages, unsigned long nr_pages) diff --git a/mm/gup_test.h b/mm/gup_test.h new file mode 100644 index 000000000000..931c2f3f477a --- /dev/null +++ b/mm/gup_test.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef __GUP_TEST_H +#define __GUP_TEST_H + +#include + +#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_test) +#define GUP_BENCHMARK _IOWR('g', 2, struct gup_test) +#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_test) +#define PIN_BENCHMARK _IOWR('g', 4, struct gup_test) +#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_test) + +struct gup_test { + __u64 get_delta_usec; + __u64 put_delta_usec; + __u64 addr; + __u64 size; + __u32 nr_pages_per_call; + __u32 flags; +}; + +#endif /* __GUP_TEST_H */ diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 43723df2c6c4..101c0315bc92 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -134,3 +134,5 @@ endif $(OUTPUT)/userfaultfd: LDLIBS += -lpthread $(OUTPUT)/mlock-random-test: LDLIBS += -lcap + +$(OUTPUT)/gup_test: ../../../../mm/gup_test.h diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index 00b4731f535e..03f7c4f1beaf 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -2,39 +2,19 @@ #include #include #include - #include #include #include #include #include - -#include +#include "../../../../mm/gup_test.h" #define MB (1UL << 20) #define PAGE_SIZE sysconf(_SC_PAGESIZE) -#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_test) -#define GUP_BENCHMARK _IOWR('g', 2, struct gup_test) - -/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */ -#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_test) -#define PIN_BENCHMARK _IOWR('g', 4, struct gup_test) -#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_test) - /* Just the flags we need, copied from mm.h: */ #define FOLL_WRITE 0x01 /* check pte is writable */ -struct gup_test { - __u64 get_delta_usec; - __u64 put_delta_usec; - __u64 addr; - __u64 size; - __u32 nr_pages_per_call; - __u32 flags; - __u64 expansion[10]; /* For future use */ -}; - int main(int argc, char **argv) { struct gup_test gup; -- cgit v1.3.1 From c2aa8afc36fa8669ac165ace1f4d7173f21f367f Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:11 -0800 Subject: selftests/vm: rename run_vmtests --> run_vmtests.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename to *.sh, in order to match the conventions of all of the other items in selftest/vm. The only reason not to use a .sh suffix a shell script like this, might be to make it look more like a normal program, but that's not an issue here. Link: https://lkml.kernel.org/r/20201026064021.3545418-4-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Ralph Campbell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 101c0315bc92..74ac457c96bb 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -73,7 +73,7 @@ TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs endif -TEST_PROGS := run_vmtests +TEST_PROGS := run_vmtests.sh TEST_FILES := test_vmalloc.sh -- cgit v1.3.1 From f545605cc08e1f1820b4c8748689e7c6d4365d99 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:14 -0800 Subject: selftests/vm: minor cleanup: Makefile and gup_test.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few cleanups that don't deserve separate patches, but that also should not clutter up other functional changes: 1. Remove an unnecessary #include 2. Restore the sorted order of TEST_GEN_FILES. 3. Add -lpthread to the common LDLIBS, as it is harmless and several tests use it. This gets rid of one special rule already. Link: https://lkml.kernel.org/r/20201026064021.3545418-5-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Ralph Campbell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 10 ++++------ tools/testing/selftests/vm/gup_test.c | 1 - 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 74ac457c96bb..e640d8145cbd 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -21,14 +21,15 @@ MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) -LDLIBS = -lrt +LDLIBS = -lrt -lpthread TEST_GEN_FILES = compaction_test TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm -TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += khugepaged TEST_GEN_FILES += map_fixed_noreplace +TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests @@ -37,7 +38,6 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += khugepaged ifeq ($(ARCH),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) @@ -80,7 +80,7 @@ TEST_FILES := test_vmalloc.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk -$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread +$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs ifeq ($(ARCH),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) @@ -131,8 +131,6 @@ warn_32bit_failure: endif endif -$(OUTPUT)/userfaultfd: LDLIBS += -lpthread - $(OUTPUT)/mlock-random-test: LDLIBS += -lcap $(OUTPUT)/gup_test: ../../../../mm/gup_test.h diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index 03f7c4f1beaf..1a54771ad97e 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include "../../../../mm/gup_test.h" -- cgit v1.3.1 From a9bed1e1c2a9bb36cdd29af0ef48044d1b9e8c2a Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:17 -0800 Subject: selftests/vm: only some gup_test items are really benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Therefore, some minor cleanup and improvements are in order: 1. Rename the other items appropriately. 2. Stop reporting timing information on the non-benchmark items. It's still being recorded and is available, but there's no point in cluttering up the report with data that no one reasonably needs to check. 3. Don't do iterations, for non-benchmark items. 4. Print out a shorter, more appropriate report for the non-benchmark tests. 5. Add the command that was run, to the report. This really helps, as there are quite a lot of options now. 6. Use a larger integer type for cmd, now that it's being compared Otherwise it doesn't work, because in this case cmd is about 3 billion, which is the perfect size for problems with signed vs unsigned int. Link: https://lkml.kernel.org/r/20201026064021.3545418-6-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Ralph Campbell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/pin_user_pages.rst | 2 +- mm/gup_test.c | 14 ++++----- mm/gup_test.h | 8 +++--- tools/testing/selftests/vm/gup_test.c | 47 +++++++++++++++++++++++++------ 4 files changed, 51 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst index eae972b23224..fcf605be43d0 100644 --- a/Documentation/core-api/pin_user_pages.rst +++ b/Documentation/core-api/pin_user_pages.rst @@ -226,7 +226,7 @@ This file:: has the following new calls to exercise the new pin*() wrapper functions: * PIN_FAST_BENCHMARK (./gup_test -a) -* PIN_BENCHMARK (./gup_test -b) +* PIN_BASIC_TEST (./gup_test -b) You can monitor how many total dma-pinned pages have been acquired and released since the system was booted, via two new /proc/vmstat entries: :: diff --git a/mm/gup_test.c b/mm/gup_test.c index 4c2d70d88f24..173bb38f3688 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -13,13 +13,13 @@ static void put_back_pages(unsigned int cmd, struct page **pages, switch (cmd) { case GUP_FAST_BENCHMARK: - case GUP_BENCHMARK: + case GUP_BASIC_TEST: for (i = 0; i < nr_pages; i++) put_page(pages[i]); break; case PIN_FAST_BENCHMARK: - case PIN_BENCHMARK: + case PIN_BASIC_TEST: case PIN_LONGTERM_BENCHMARK: unpin_user_pages(pages, nr_pages); break; @@ -34,7 +34,7 @@ static void verify_dma_pinned(unsigned int cmd, struct page **pages, switch (cmd) { case PIN_FAST_BENCHMARK: - case PIN_BENCHMARK: + case PIN_BASIC_TEST: case PIN_LONGTERM_BENCHMARK: for (i = 0; i < nr_pages; i++) { page = pages[i]; @@ -94,7 +94,7 @@ static int __gup_test_ioctl(unsigned int cmd, nr = get_user_pages_fast(addr, nr, gup->flags, pages + i); break; - case GUP_BENCHMARK: + case GUP_BASIC_TEST: nr = get_user_pages(addr, nr, gup->flags, pages + i, NULL); break; @@ -102,7 +102,7 @@ static int __gup_test_ioctl(unsigned int cmd, nr = pin_user_pages_fast(addr, nr, gup->flags, pages + i); break; - case PIN_BENCHMARK: + case PIN_BASIC_TEST: nr = pin_user_pages(addr, nr, gup->flags, pages + i, NULL); break; @@ -157,10 +157,10 @@ static long gup_test_ioctl(struct file *filep, unsigned int cmd, switch (cmd) { case GUP_FAST_BENCHMARK: - case GUP_BENCHMARK: case PIN_FAST_BENCHMARK: - case PIN_BENCHMARK: case PIN_LONGTERM_BENCHMARK: + case GUP_BASIC_TEST: + case PIN_BASIC_TEST: break; default: return -EINVAL; diff --git a/mm/gup_test.h b/mm/gup_test.h index 931c2f3f477a..921b4caad8ef 100644 --- a/mm/gup_test.h +++ b/mm/gup_test.h @@ -5,10 +5,10 @@ #include #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_test) -#define GUP_BENCHMARK _IOWR('g', 2, struct gup_test) -#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_test) -#define PIN_BENCHMARK _IOWR('g', 4, struct gup_test) -#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_test) +#define PIN_FAST_BENCHMARK _IOWR('g', 2, struct gup_test) +#define PIN_LONGTERM_BENCHMARK _IOWR('g', 3, struct gup_test) +#define GUP_BASIC_TEST _IOWR('g', 4, struct gup_test) +#define PIN_BASIC_TEST _IOWR('g', 5, struct gup_test) struct gup_test { __u64 get_delta_usec; diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index 1a54771ad97e..f9163e1bb57a 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -14,12 +14,30 @@ /* Just the flags we need, copied from mm.h: */ #define FOLL_WRITE 0x01 /* check pte is writable */ +static char *cmd_to_str(unsigned long cmd) +{ + switch (cmd) { + case GUP_FAST_BENCHMARK: + return "GUP_FAST_BENCHMARK"; + case PIN_FAST_BENCHMARK: + return "PIN_FAST_BENCHMARK"; + case PIN_LONGTERM_BENCHMARK: + return "PIN_LONGTERM_BENCHMARK"; + case GUP_BASIC_TEST: + return "GUP_BASIC_TEST"; + case PIN_BASIC_TEST: + return "PIN_BASIC_TEST"; + } + return "Unknown command"; +} + int main(int argc, char **argv) { struct gup_test gup; unsigned long size = 128 * MB; int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; - int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE; + unsigned long cmd = GUP_FAST_BENCHMARK; + int flags = MAP_PRIVATE; char *file = "/dev/zero"; char *p; @@ -29,7 +47,7 @@ int main(int argc, char **argv) cmd = PIN_FAST_BENCHMARK; break; case 'b': - cmd = PIN_BENCHMARK; + cmd = PIN_BASIC_TEST; break; case 'L': cmd = PIN_LONGTERM_BENCHMARK; @@ -50,7 +68,7 @@ int main(int argc, char **argv) thp = 0; break; case 'U': - cmd = GUP_BENCHMARK; + cmd = GUP_BASIC_TEST; break; case 'u': cmd = GUP_FAST_BENCHMARK; @@ -104,18 +122,31 @@ int main(int argc, char **argv) for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) p[0] = 0; - for (i = 0; i < repeats; i++) { + /* Only report timing information on the *_BENCHMARK commands: */ + if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) || + (cmd == PIN_LONGTERM_BENCHMARK)) { + for (i = 0; i < repeats; i++) { + gup.size = size; + if (ioctl(fd, cmd, &gup)) + perror("ioctl"), exit(1); + + printf("%s: Time: get:%lld put:%lld us", + cmd_to_str(cmd), gup.get_delta_usec, + gup.put_delta_usec); + if (gup.size != size) + printf(", truncated (size: %lld)", gup.size); + printf("\n"); + } + } else { gup.size = size; if (ioctl(fd, cmd, &gup)) { perror("ioctl"); exit(1); } - printf("Time: get:%lld put:%lld us", gup.get_delta_usec, - gup.put_delta_usec); + printf("%s: done\n", cmd_to_str(cmd)); if (gup.size != size) - printf(", truncated (size: %lld)", gup.size); - printf("\n"); + printf("Truncated (size: %lld)\n", gup.size); } return 0; -- cgit v1.3.1 From f4f9bda418ab8b4dbc5372e9e2a28162f7777154 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:21 -0800 Subject: selftests/vm: gup_test: introduce the dump_pages() sub-test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For quite a while, I was doing a quick hack to gup_test.c (previously, gup_benchmark.c) whenever I wanted to try out my changes to dump_page(). This makes that hack unnecessary, and instead allows anyone to easily get the same coverage from a user space program. That saves a lot of time because you don't have to change the kernel, in order to test different pages and options. The new sub-test takes advantage of the existing gup_test infrastructure, which already provides a simple user space program, some allocated user space pages, an ioctl call, pinning of those pages (via either get_user_pages or pin_user_pages) and a corresponding kernel-side test invocation. There's not much more required, mainly just a couple of inputs from the user. In fact, the new test re-uses the existing command line options in order to get various helpful combinations (THP or normal, _fast or slow gup, gup vs. pup, and more). New command line options are: which pages to dump, and what type of "get/pin" to use. In order to figure out which pages to dump, the logic is: * If the user doesn't specify anything, the page 0 (the first page in the address range that the program sets up for testing) is dumped. * Or, the user can type up to 8 page indices anywhere on the command line. If you type more than 8, then it uses the first 8 and ignores the remaining items. For example: ./gup_test -ct -F 1 0 19 0x1000 Meaning: -c: dump pages sub-test -t: use THP pages -F 1: use pin_user_pages() instead of get_user_pages() 0 19 0x1000: dump pages 0, 19, and 4096 Link: https://lkml.kernel.org/r/20201026064021.3545418-7-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Ralph Campbell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/Kconfig | 6 ++++ mm/gup_test.c | 56 +++++++++++++++++++++++++++++++++-- mm/gup_test.h | 10 +++++++ tools/testing/selftests/vm/gup_test.c | 45 ++++++++++++++++++++++++++-- 4 files changed, 113 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/mm/Kconfig b/mm/Kconfig index e25e5cb2989f..350f2e23a94a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -832,6 +832,12 @@ config GUP_TEST get_user_pages*() and pin_user_pages*(), as well as smoke tests of the non-_fast variants. + There is also a sub-test that allows running dump_page() on any + of up to eight pages (selected by command line args) within the + range of user-space addresses. These pages are either pinned via + pin_user_pages*(), or pinned via get_user_pages*(), as specified + by other command line arguments. + See tools/testing/selftests/vm/gup_test.c config GUP_GET_PTE_LOW_HIGH diff --git a/mm/gup_test.c b/mm/gup_test.c index 173bb38f3688..087ddaf30eb4 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -7,7 +7,7 @@ #include "gup_test.h" static void put_back_pages(unsigned int cmd, struct page **pages, - unsigned long nr_pages) + unsigned long nr_pages, unsigned int gup_test_flags) { unsigned long i; @@ -23,6 +23,15 @@ static void put_back_pages(unsigned int cmd, struct page **pages, case PIN_LONGTERM_BENCHMARK: unpin_user_pages(pages, nr_pages); break; + case DUMP_USER_PAGES_TEST: + if (gup_test_flags & GUP_TEST_FLAG_DUMP_PAGES_USE_PIN) { + unpin_user_pages(pages, nr_pages); + } else { + for (i = 0; i < nr_pages; i++) + put_page(pages[i]); + + } + break; } } @@ -49,6 +58,37 @@ static void verify_dma_pinned(unsigned int cmd, struct page **pages, } } +static void dump_pages_test(struct gup_test *gup, struct page **pages, + unsigned long nr_pages) +{ + unsigned int index_to_dump; + unsigned int i; + + /* + * Zero out any user-supplied page index that is out of range. Remember: + * .which_pages[] contains a 1-based set of page indices. + */ + for (i = 0; i < GUP_TEST_MAX_PAGES_TO_DUMP; i++) { + if (gup->which_pages[i] > nr_pages) { + pr_warn("ZEROING due to out of range: .which_pages[%u]: %u\n", + i, gup->which_pages[i]); + gup->which_pages[i] = 0; + } + } + + for (i = 0; i < GUP_TEST_MAX_PAGES_TO_DUMP; i++) { + index_to_dump = gup->which_pages[i]; + + if (index_to_dump) { + index_to_dump--; // Decode from 1-based, to 0-based + pr_info("---- page #%u, starting from user virt addr: 0x%llx\n", + index_to_dump, gup->addr); + dump_page(pages[index_to_dump], + "gup_test: dump_pages() test"); + } + } +} + static int __gup_test_ioctl(unsigned int cmd, struct gup_test *gup) { @@ -111,6 +151,14 @@ static int __gup_test_ioctl(unsigned int cmd, gup->flags | FOLL_LONGTERM, pages + i, NULL); break; + case DUMP_USER_PAGES_TEST: + if (gup->flags & GUP_TEST_FLAG_DUMP_PAGES_USE_PIN) + nr = pin_user_pages(addr, nr, gup->flags, + pages + i, NULL); + else + nr = get_user_pages(addr, nr, gup->flags, + pages + i, NULL); + break; default: ret = -EINVAL; goto unlock; @@ -134,9 +182,12 @@ static int __gup_test_ioctl(unsigned int cmd, */ verify_dma_pinned(cmd, pages, nr_pages); + if (cmd == DUMP_USER_PAGES_TEST) + dump_pages_test(gup, pages, nr_pages); + start_time = ktime_get(); - put_back_pages(cmd, pages, nr_pages); + put_back_pages(cmd, pages, nr_pages, gup->flags); end_time = ktime_get(); gup->put_delta_usec = ktime_us_delta(end_time, start_time); @@ -161,6 +212,7 @@ static long gup_test_ioctl(struct file *filep, unsigned int cmd, case PIN_LONGTERM_BENCHMARK: case GUP_BASIC_TEST: case PIN_BASIC_TEST: + case DUMP_USER_PAGES_TEST: break; default: return -EINVAL; diff --git a/mm/gup_test.h b/mm/gup_test.h index 921b4caad8ef..90a6713d50eb 100644 --- a/mm/gup_test.h +++ b/mm/gup_test.h @@ -9,6 +9,11 @@ #define PIN_LONGTERM_BENCHMARK _IOWR('g', 3, struct gup_test) #define GUP_BASIC_TEST _IOWR('g', 4, struct gup_test) #define PIN_BASIC_TEST _IOWR('g', 5, struct gup_test) +#define DUMP_USER_PAGES_TEST _IOWR('g', 6, struct gup_test) + +#define GUP_TEST_MAX_PAGES_TO_DUMP 8 + +#define GUP_TEST_FLAG_DUMP_PAGES_USE_PIN 0x1 struct gup_test { __u64 get_delta_usec; @@ -17,6 +22,11 @@ struct gup_test { __u64 size; __u32 nr_pages_per_call; __u32 flags; + /* + * Each non-zero entry is the number of the page (1-based: first page is + * page 1, so that zero entries mean "do nothing") from the .addr base. + */ + __u32 which_pages[GUP_TEST_MAX_PAGES_TO_DUMP]; }; #endif /* __GUP_TEST_H */ diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index f9163e1bb57a..6c6336dd3b7f 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -27,13 +27,15 @@ static char *cmd_to_str(unsigned long cmd) return "GUP_BASIC_TEST"; case PIN_BASIC_TEST: return "PIN_BASIC_TEST"; + case DUMP_USER_PAGES_TEST: + return "DUMP_USER_PAGES_TEST"; } return "Unknown command"; } int main(int argc, char **argv) { - struct gup_test gup; + struct gup_test gup = { 0 }; unsigned long size = 128 * MB; int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; unsigned long cmd = GUP_FAST_BENCHMARK; @@ -41,7 +43,7 @@ int main(int argc, char **argv) char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwSH")) != -1) { switch (opt) { case 'a': cmd = PIN_FAST_BENCHMARK; @@ -52,6 +54,21 @@ int main(int argc, char **argv) case 'L': cmd = PIN_LONGTERM_BENCHMARK; break; + case 'c': + cmd = DUMP_USER_PAGES_TEST; + /* + * Dump page 0 (index 1). May be overridden later, by + * user's non-option arguments. + * + * .which_pages is zero-based, so that zero can mean "do + * nothing". + */ + gup.which_pages[0] = 1; + break; + case 'F': + /* strtol, so you can pass flags in hex form */ + gup.flags = strtol(optarg, 0, 0); + break; case 'm': size = atoi(optarg) * MB; break; @@ -91,6 +108,30 @@ int main(int argc, char **argv) } } + if (optind < argc) { + int extra_arg_count = 0; + /* + * For example: + * + * ./gup_test -c 0 1 0x1001 + * + * ...to dump pages 0, 1, and 4097 + */ + + while ((optind < argc) && + (extra_arg_count < GUP_TEST_MAX_PAGES_TO_DUMP)) { + /* + * Do the 1-based indexing here, so that the user can + * use normal 0-based indexing on the command line. + */ + long page_index = strtol(argv[optind], 0, 0) + 1; + + gup.which_pages[extra_arg_count] = page_index; + extra_arg_count++; + optind++; + } + } + filed = open(file, O_RDWR|O_CREAT); if (filed < 0) { perror("open"); -- cgit v1.3.1 From d943fe81e0bf49dda1369e87d49c5276a02698df Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:24 -0800 Subject: selftests/vm: run_vmtests.sh: update and clean up gup_test invocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run benchmarks on the _fast variants of gup and pup, as originally intended. Run the new gup_test sub-test: dump pages. In addition to exercising the dump_page() call, it also demonstrates the various options you can use to specify which pages to dump, and how. Link: https://lkml.kernel.org/r/20201026064021.3545418-8-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Ralph Campbell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/run_vmtests | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index d1843d5f3c30..4ac84b350d9f 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -123,10 +123,10 @@ else echo "[PASS]" fi -echo "--------------------------------------------" -echo "running 'gup_test -U' (normal/slow gup)" -echo "--------------------------------------------" -./gup_test -U +echo "------------------------------------------------------" +echo "running: gup_test -u # get_user_pages_fast() benchmark" +echo "------------------------------------------------------" +./gup_test -u if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -134,10 +134,22 @@ else echo "[PASS]" fi -echo "------------------------------------------" -echo "running gup_test -b (pin_user_pages)" -echo "------------------------------------------" -./gup_test -b +echo "------------------------------------------------------" +echo "running: gup_test -a # pin_user_pages_fast() benchmark" +echo "------------------------------------------------------" +./gup_test -a +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "------------------------------------------------------------" +echo "# Dump pages 0, 19, and 4096, using pin_user_pages:" +echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test" +echo "------------------------------------------------------------" +./gup_test -ct -F 0x1 0 19 0x1000 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 -- cgit v1.3.1 From f3a45709d2bb1b6cbab2899a6c8e75dfb8e4aad7 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:28 -0800 Subject: selftests/vm: hmm-tests: remove the libhugetlbfs dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HMM selftests are incredibly useful, but they are only effective if people actually build and run them. All the other tests in selftests/vm can be built with very standard, always-available libraries: libpthread, librt. The hmm-tests.c program, on the other hand, requires something that is (much) less readily available: libhugetlbfs. And so the build will typically fail for many developers. A simple attempt to install libhugetlbfs will also run into complications on some common distros these days: Fedora and Arch Linux (yes, Arch AUR has it, but that's fragile, as always with AUR). The library is not maintained actively enough at the moment, for distros to deal with it. I had to build it from source, for Fedora, and that didn't go too smoothly either. It turns out that, out of 21 tests in hmm-tests.c, only 2 actually require functionality from libhugetlbfs. Therefore, if libhugetlbfs is missing, simply ifdef those two tests out and allow the developer to at least have the other 19 tests, if they don't want to pause to work through the above issues. Also issue a warning, so that it's clear that there is an imperfection in the build. In order to do that, a tiny shell script (check_config.sh) runs a quick compile (not link, that's too prone to false failures with library paths), and basically, if the compiler doesn't find hugetlbfs.h in its standard locations, then the script concludes that libhugetlbfs is not available. The output is in two files, one for inclusion in hmm-test.c (local_config.h), and one for inclusion in the Makefile (local_config.mk). Link: https://lkml.kernel.org/r/20201026064021.3545418-9-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Ralph Campbell Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 24 +++++++++++++++++++++-- tools/testing/selftests/vm/check_config.sh | 31 ++++++++++++++++++++++++++++++ tools/testing/selftests/vm/hmm-tests.c | 10 +++++++++- 4 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/vm/check_config.sh (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 2c8ddcf41c0e..e90d28bcd518 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -20,3 +20,4 @@ va_128TBswitch map_fixed_noreplace write_to_hugetlbfs hmm-tests +local_config.* diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e640d8145cbd..bdd3fe7fd086 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests + +include local_config.mk + uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') @@ -80,8 +83,6 @@ TEST_FILES := test_vmalloc.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk -$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs - ifeq ($(ARCH),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) @@ -134,3 +135,22 @@ endif $(OUTPUT)/mlock-random-test: LDLIBS += -lcap $(OUTPUT)/gup_test: ../../../../mm/gup_test.h + +$(OUTPUT)/hmm-tests: local_config.h + +# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. +$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) + +local_config.mk local_config.h: check_config.sh + /bin/sh ./check_config.sh $(CC) + +EXTRA_CLEAN += local_config.mk local_config.h + +ifeq ($(HMM_EXTRA_LIBS),) +all: warn_missing_hugelibs + +warn_missing_hugelibs: + @echo ; \ + echo "Warning: missing libhugetlbfs support. Some HMM tests will be skipped." ; \ + echo +endif diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh new file mode 100644 index 000000000000..079c8a40b85d --- /dev/null +++ b/tools/testing/selftests/vm/check_config.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Probe for libraries and create header files to record the results. Both C +# header files and Makefile include fragments are created. + +OUTPUT_H_FILE=local_config.h +OUTPUT_MKFILE=local_config.mk + +# libhugetlbfs +tmpname=$(mktemp) +tmpfile_c=${tmpname}.c +tmpfile_o=${tmpname}.o + +echo "#include " > $tmpfile_c +echo "#include " >> $tmpfile_c +echo "int func(void) { return 0; }" >> $tmpfile_c + +CC=${1:?"Usage: $0 # example compiler: gcc"} +$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1 + +if [ -f $tmpfile_o ]; then + echo "#define LOCAL_CONFIG_HAVE_LIBHUGETLBFS 1" > $OUTPUT_H_FILE + echo "HMM_EXTRA_LIBS = -lhugetlbfs" > $OUTPUT_MKFILE +else + echo "// No libhugetlbfs support found" > $OUTPUT_H_FILE + echo "# No libhugetlbfs support found, so:" > $OUTPUT_MKFILE + echo "HMM_EXTRA_LIBS = " >> $OUTPUT_MKFILE +fi + +rm ${tmpname}.* diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index c9404ef9698e..5d1ac691b9f4 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -21,12 +21,16 @@ #include #include #include -#include #include #include #include #include +#include "./local_config.h" +#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS +#include +#endif + /* * This is a private UAPI to the kernel test module so it isn't exported * in the usual include/uapi/... directory. @@ -662,6 +666,7 @@ TEST_F(hmm, anon_write_huge) hmm_buffer_free(buffer); } +#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS /* * Write huge TLBFS page. */ @@ -720,6 +725,7 @@ TEST_F(hmm, anon_write_hugetlbfs) buffer->ptr = NULL; hmm_buffer_free(buffer); } +#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */ /* * Read mmap'ed file memory. @@ -1336,6 +1342,7 @@ TEST_F(hmm2, snapshot) hmm_buffer_free(buffer); } +#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS /* * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that * should be mapped by a large page table entry. @@ -1411,6 +1418,7 @@ TEST_F(hmm, compound) buffer->ptr = NULL; hmm_buffer_free(buffer); } +#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */ /* * Test two devices reading the same memory (double mapped). -- cgit v1.3.1 From a26c4c62990a3ad5061f72e68f2394a01480265d Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:05:31 -0800 Subject: selftests/vm: 2x speedup for run_vmtests.sh Each invocation of userfaultfd for "anon" and "shmem" was taking about 6.5 sec to run, contributing to an overall run time of about 22 sec for run_vmtests.sh. Reduce the size and bounce input values to the userfaultfd invocation within run_vmtests.sh, enough to get each invocation down to about 1.0 sec. This should still provide a reasonable smoke test, while staying within a nominal time budget of around 1 second or so per test. And this brings the overall running time of run_vmtests.sh down to 11 second. Link: https://lkml.kernel.org/r/20201026064021.3545418-10-jhubbard@nvidia.com Signed-off-by: John Hubbard Cc: Mike Rapoport Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/run_vmtests | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 4ac84b350d9f..9e8837768ee2 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -160,7 +160,7 @@ fi echo "-------------------" echo "running userfaultfd" echo "-------------------" -./userfaultfd anon 128 32 +./userfaultfd anon 20 16 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -185,7 +185,7 @@ rm -f $mnt/ufd_test_file echo "-------------------------" echo "running userfaultfd_shmem" echo "-------------------------" -./userfaultfd shmem 128 32 +./userfaultfd shmem 20 16 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 -- cgit v1.3.1 From 7df666253f2610284f653bce0e2e50b4923c84aa Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 14 Dec 2020 19:07:25 -0800 Subject: kselftests: vm: add mremap tests Patch series "Speed up mremap on large regions", v4. mremap time can be optimized by moving entries at the PMD/PUD level if the source and destination addresses are PMD/PUD-aligned and PMD/PUD-sized. Enable moving at the PMD and PUD levels on arm64 and x86. Other architectures where this type of move is supported and known to be safe can also opt-in to these optimizations by enabling HAVE_MOVE_PMD and HAVE_MOVE_PUD. Observed Performance Improvements for remapping a PUD-aligned 1GB-sized region on x86 and arm64: - HAVE_MOVE_PMD is already enabled on x86 : N/A - Enabling HAVE_MOVE_PUD on x86 : ~13x speed up - Enabling HAVE_MOVE_PMD on arm64 : ~ 8x speed up - Enabling HAVE_MOVE_PUD on arm64 : ~19x speed up Altogether, HAVE_MOVE_PMD and HAVE_MOVE_PUD give a total of ~150x speed up on arm64. This patch (of 4): Test mremap on regions of various sizes and alignments and validate data after remapping. Also provide total time for remapping the region which is useful for performance comparison of the mremap optimizations that move pages at the PMD/PUD levels if HAVE_MOVE_PMD and/or HAVE_MOVE_PUD are enabled. Link: https://lkml.kernel.org/r/20201014005320.2233162-1-kaleshsingh@google.com Link: https://lkml.kernel.org/r/20201014005320.2233162-2-kaleshsingh@google.com Signed-off-by: Kalesh Singh Reviewed-by: John Hubbard Cc: Shuah Khan Cc: Kirill A. Shutemov Cc: Suren Baghdasaryan Cc: Minchan Kim Cc: Lokesh Gidra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Catalin Marinas Cc: Will Deacon Cc: Peter Zijlstra (Intel) Cc: Kees Cook Cc: Aneesh Kumar K.V Cc: Arnd Bergmann Cc: Sami Tolvanen Cc: Masahiro Yamada Cc: Krzysztof Kozlowski Cc: Frederic Weisbecker Cc: Hassan Naveed Cc: Christian Brauner Cc: Anshuman Khandual Cc: Mark Rutland Cc: Gavin Shan Cc: Mike Rapoport Cc: Steven Price Cc: Jia He Cc: Ram Pai Cc: Sandipan Das Cc: Zi Yan Cc: Mina Almasry Cc: Ralph Campbell Cc: Dave Hansen Cc: Brian Geffon Cc: Masami Hiramatsu Cc: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/mremap_test.c | 344 +++++++++++++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests | 11 + 4 files changed, 357 insertions(+) create mode 100644 tools/testing/selftests/vm/mremap_test.c (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index e90d28bcd518..9a35c3f6a557 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -8,6 +8,7 @@ thuge-gen compaction_test mlock2-tests mremap_dontunmap +mremap_test on-fault-limit transhuge-stress protection_keys diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index bdd3fe7fd086..9a25307f6115 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -37,6 +37,7 @@ TEST_GEN_FILES += map_populate TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests TEST_GEN_FILES += mremap_dontunmap +TEST_GEN_FILES += mremap_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c new file mode 100644 index 000000000000..9c391d016922 --- /dev/null +++ b/tools/testing/selftests/vm/mremap_test.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020 Google LLC + */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include "../kselftest.h" + +#define EXPECT_SUCCESS 0 +#define EXPECT_FAILURE 1 +#define NON_OVERLAPPING 0 +#define OVERLAPPING 1 +#define NS_PER_SEC 1000000000ULL +#define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ +#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) + +struct config { + unsigned long long src_alignment; + unsigned long long dest_alignment; + unsigned long long region_size; + int overlapping; +}; + +struct test { + const char *name; + struct config config; + int expect_failure; +}; + +enum { + _1KB = 1ULL << 10, /* 1KB -> not page aligned */ + _4KB = 4ULL << 10, + _8KB = 8ULL << 10, + _1MB = 1ULL << 20, + _2MB = 2ULL << 20, + _4MB = 4ULL << 20, + _1GB = 1ULL << 30, + _2GB = 2ULL << 30, + PTE = _4KB, + PMD = _2MB, + PUD = _1GB, +}; + +#define MAKE_TEST(source_align, destination_align, size, \ + overlaps, should_fail, test_name) \ +{ \ + .name = test_name, \ + .config = { \ + .src_alignment = source_align, \ + .dest_alignment = destination_align, \ + .region_size = size, \ + .overlapping = overlaps, \ + }, \ + .expect_failure = should_fail \ +} + +/* + * Returns the start address of the mapping on success, else returns + * NULL on failure. + */ +static void *get_source_mapping(struct config c) +{ + unsigned long long addr = 0ULL; + void *src_addr = NULL; +retry: + addr += c.src_alignment; + src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (src_addr == MAP_FAILED) { + if (errno == EPERM) + goto retry; + goto error; + } + /* + * Check that the address is aligned to the specified alignment. + * Addresses which have alignments that are multiples of that + * specified are not considered valid. For instance, 1GB address is + * 2MB-aligned, however it will not be considered valid for a + * requested alignment of 2MB. This is done to reduce coincidental + * alignment in the tests. + */ + if (((unsigned long long) src_addr & (c.src_alignment - 1)) || + !((unsigned long long) src_addr & c.src_alignment)) + goto retry; + + if (!src_addr) + goto error; + + return src_addr; +error: + ksft_print_msg("Failed to map source region: %s\n", + strerror(errno)); + return NULL; +} + +/* Returns the time taken for the remap on success else returns -1. */ +static long long remap_region(struct config c, unsigned int threshold_mb, + char pattern_seed) +{ + void *addr, *src_addr, *dest_addr; + unsigned long long i; + struct timespec t_start = {0, 0}, t_end = {0, 0}; + long long start_ns, end_ns, align_mask, ret, offset; + unsigned long long threshold; + + if (threshold_mb == VALIDATION_NO_THRESHOLD) + threshold = c.region_size; + else + threshold = MIN(threshold_mb * _1MB, c.region_size); + + src_addr = get_source_mapping(c); + if (!src_addr) { + ret = -1; + goto out; + } + + /* Set byte pattern */ + srand(pattern_seed); + for (i = 0; i < threshold; i++) + memset((char *) src_addr + i, (char) rand(), 1); + + /* Mask to zero out lower bits of address for alignment */ + align_mask = ~(c.dest_alignment - 1); + /* Offset of destination address from the end of the source region */ + offset = (c.overlapping) ? -c.dest_alignment : c.dest_alignment; + addr = (void *) (((unsigned long long) src_addr + c.region_size + + offset) & align_mask); + + /* See comment in get_source_mapping() */ + if (!((unsigned long long) addr & c.dest_alignment)) + addr = (void *) ((unsigned long long) addr | c.dest_alignment); + + clock_gettime(CLOCK_MONOTONIC, &t_start); + dest_addr = mremap(src_addr, c.region_size, c.region_size, + MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr); + clock_gettime(CLOCK_MONOTONIC, &t_end); + + if (dest_addr == MAP_FAILED) { + ksft_print_msg("mremap failed: %s\n", strerror(errno)); + ret = -1; + goto clean_up_src; + } + + /* Verify byte pattern after remapping */ + srand(pattern_seed); + for (i = 0; i < threshold; i++) { + char c = (char) rand(); + + if (((char *) dest_addr)[i] != c) { + ksft_print_msg("Data after remap doesn't match at offset %d\n", + i); + ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + ((char *) dest_addr)[i] & 0xff); + ret = -1; + goto clean_up_dest; + } + } + + start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec; + end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec; + ret = end_ns - start_ns; + +/* + * Since the destination address is specified using MREMAP_FIXED, subsequent + * mremap will unmap any previous mapping at the address range specified by + * dest_addr and region_size. This significantly affects the remap time of + * subsequent tests. So we clean up mappings after each test. + */ +clean_up_dest: + munmap(dest_addr, c.region_size); +clean_up_src: + munmap(src_addr, c.region_size); +out: + return ret; +} + +static void run_mremap_test_case(struct test test_case, int *failures, + unsigned int threshold_mb, + unsigned int pattern_seed) +{ + long long remap_time = remap_region(test_case.config, threshold_mb, + pattern_seed); + + if (remap_time < 0) { + if (test_case.expect_failure) + ksft_test_result_pass("%s\n\tExpected mremap failure\n", + test_case.name); + else { + ksft_test_result_fail("%s\n", test_case.name); + *failures += 1; + } + } else { + /* + * Comparing mremap time is only applicable if entire region + * was faulted in. + */ + if (threshold_mb == VALIDATION_NO_THRESHOLD || + test_case.config.region_size <= threshold_mb * _1MB) + ksft_test_result_pass("%s\n\tmremap time: %12lldns\n", + test_case.name, remap_time); + else + ksft_test_result_pass("%s\n", test_case.name); + } +} + +static void usage(const char *cmd) +{ + fprintf(stderr, + "Usage: %s [[-t ] [-p ]]\n" + "-t\t only validate threshold_mb of the remapped region\n" + " \t if 0 is supplied no threshold is used; all tests\n" + " \t are run and remapped regions validated fully.\n" + " \t The default threshold used is 4MB.\n" + "-p\t provide a seed to generate the random pattern for\n" + " \t validating the remapped region.\n", cmd); +} + +static int parse_args(int argc, char **argv, unsigned int *threshold_mb, + unsigned int *pattern_seed) +{ + const char *optstr = "t:p:"; + int opt; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 't': + *threshold_mb = atoi(optarg); + break; + case 'p': + *pattern_seed = atoi(optarg); + break; + default: + usage(argv[0]); + return -1; + } + } + + if (optind < argc) { + usage(argv[0]); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + int failures = 0; + int i, run_perf_tests; + unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; + unsigned int pattern_seed; + time_t t; + + pattern_seed = (unsigned int) time(&t); + + if (parse_args(argc, argv, &threshold_mb, &pattern_seed) < 0) + exit(EXIT_FAILURE); + + ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n", + threshold_mb, pattern_seed); + + struct test test_cases[] = { + /* Expected mremap failures */ + MAKE_TEST(_4KB, _4KB, _4KB, OVERLAPPING, EXPECT_FAILURE, + "mremap - Source and Destination Regions Overlapping"), + MAKE_TEST(_4KB, _1KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE, + "mremap - Destination Address Misaligned (1KB-aligned)"), + MAKE_TEST(_1KB, _4KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE, + "mremap - Source Address Misaligned (1KB-aligned)"), + + /* Src addr PTE aligned */ + MAKE_TEST(PTE, PTE, _8KB, NON_OVERLAPPING, EXPECT_SUCCESS, + "8KB mremap - Source PTE-aligned, Destination PTE-aligned"), + + /* Src addr 1MB aligned */ + MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"), + MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"), + + /* Src addr PMD aligned */ + MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "4MB mremap - Source PMD-aligned, Destination PTE-aligned"), + MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"), + MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "4MB mremap - Source PMD-aligned, Destination PMD-aligned"), + + /* Src addr PUD aligned */ + MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2GB mremap - Source PUD-aligned, Destination PTE-aligned"), + MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"), + MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2GB mremap - Source PUD-aligned, Destination PMD-aligned"), + MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2GB mremap - Source PUD-aligned, Destination PUD-aligned"), + }; + + struct test perf_test_cases[] = { + /* + * mremap 1GB region - Page table level aligned time + * comparison. + */ + MAKE_TEST(PTE, PTE, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "1GB mremap - Source PTE-aligned, Destination PTE-aligned"), + MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "1GB mremap - Source PMD-aligned, Destination PMD-aligned"), + MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "1GB mremap - Source PUD-aligned, Destination PUD-aligned"), + }; + + run_perf_tests = (threshold_mb == VALIDATION_NO_THRESHOLD) || + (threshold_mb * _1MB >= _1GB); + + ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ? + ARRAY_SIZE(perf_test_cases) : 0)); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) + run_mremap_test_case(test_cases[i], &failures, threshold_mb, + pattern_seed); + + if (run_perf_tests) { + ksft_print_msg("\n%s\n", + "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); + for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++) + run_mremap_test_case(perf_test_cases[i], &failures, + threshold_mb, pattern_seed); + } + + if (failures > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 9e8837768ee2..e953f3cd9664 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -253,6 +253,17 @@ else echo "[PASS]" fi +echo "-------------------" +echo "running mremap_test" +echo "-------------------" +./mremap_test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "-----------------" echo "running thuge-gen" echo "-----------------" -- cgit v1.3.1 From f289041ed4cf9a3f6e8a32068fef9ffb2acc5662 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:45 -0800 Subject: mm, page_poison: remove CONFIG_PAGE_POISONING_ZERO CONFIG_PAGE_POISONING_ZERO uses the zero pattern instead of 0xAA. It was introduced by commit 1414c7f4f7d7 ("mm/page_poisoning.c: allow for zero poisoning"), noting that using zeroes retains the benefit of sanitizing content of freed pages, with the benefit of not having to zero them again on alloc, and the downside of making some forms of corruption (stray writes of NULLs) harder to detect than with the 0xAA pattern. Together with CONFIG_PAGE_POISONING_NO_SANITY it made possible to sanitize the contents on free without checking it back on alloc. These days we have the init_on_free() option to achieve sanitization with zeroes and to save clearing on alloc (and without checking on alloc). Arguably if someone does choose to check the poison for corruption on alloc, the savings of not clearing the page are secondary, and it makes sense to always use the 0xAA poison pattern. Thus, remove the CONFIG_PAGE_POISONING_ZERO option for being redundant. Link: https://lkml.kernel.org/r/20201113104033.22907-6-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Mike Rapoport Cc: Rafael J. Wysocki Cc: Alexander Potapenko Cc: Kees Cook Cc: Laura Abbott Cc: Mateusz Nosek Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poison.h | 4 ---- mm/Kconfig.debug | 12 ------------ mm/page_alloc.c | 8 +------- tools/include/linux/poison.h | 6 +----- 4 files changed, 2 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/include/linux/poison.h b/include/linux/poison.h index dc8ae5d8db03..aff1c9250c82 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -27,11 +27,7 @@ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) /********** mm/page_poison.c **********/ -#ifdef CONFIG_PAGE_POISONING_ZERO -#define PAGE_POISON 0x00 -#else #define PAGE_POISON 0xaa -#endif /********** mm/page_alloc.c ************/ diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 14e29fe5bfa6..1e73717802f8 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -80,18 +80,6 @@ config PAGE_POISONING If unsure, say N -config PAGE_POISONING_ZERO - bool "Use zero for poisoning instead of debugging value" - depends on PAGE_POISONING - help - Instead of using the existing poison value, fill the pages with - zeros. This makes it harder to detect when errors are occurring - due to sanitization but the zeroing at free means that it is - no longer necessary to write zeros when GFP_ZERO is used on - allocation. - - If unsure, say N - config DEBUG_PAGE_REF bool "Enable tracepoint to track down page reference manipulation" depends on DEBUG_KERNEL diff --git a/mm/page_alloc.c b/mm/page_alloc.c index efcd1baa35e4..918647ff6eef 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2228,12 +2228,6 @@ static inline int check_new_page(struct page *page) return 1; } -static inline bool free_pages_prezeroed(void) -{ - return (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && - page_poisoning_enabled_static()) || want_init_on_free(); -} - #ifdef CONFIG_DEBUG_VM /* * With DEBUG_VM enabled, order-0 pages are checked for expected state when @@ -2296,7 +2290,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, kernel_unpoison_pages(page, 1 << order); set_page_owner(page, order, gfp_flags); - if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags)) + if (!want_init_on_free() && want_init_on_alloc(gfp_flags)) kernel_init_free_pages(page, 1 << order); } diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index d29725769107..2e6338ac5eed 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -35,12 +35,8 @@ */ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) -/********** mm/debug-pagealloc.c **********/ -#ifdef CONFIG_PAGE_POISONING_ZERO -#define PAGE_POISON 0x00 -#else +/********** mm/page_poison.c **********/ #define PAGE_POISON 0xaa -#endif /********** mm/page_alloc.c ************/ -- cgit v1.3.1 From 77f962e7ae24e5fa7b257b8242c62e716119a312 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 14 Dec 2020 19:13:58 -0800 Subject: userfaultfd: selftests: make __{s,u}64 format specifiers portable On certain platforms (powerpcle is the one on which I ran into this), "%Ld" and "%Lu" are unsuitable for printing __s64 and __u64, respectively, resulting in build warnings. Cast to {u,}int64_t, and use the PRI{d,u}64 macros defined in inttypes.h to print them. This ought to be portable to all platforms. Splitting this off into a separate macro lets us remove some lines, and get rid of some (I would argue) stylistically odd cases where we joined printf() and exit() into a single statement with a ,. Finally, this also fixes a "missing braces around initializer" warning when we initialize prms in wp_range(). [axelrasmussen@google.com: v2] Link: https://lkml.kernel.org/r/20201203180244.1811601-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20201202211542.1121189-1-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Peter Xu Cc: Shuah Khan Cc: Joe Perches Cc: Mike Rapoport Cc: Andrea Arcangeli Cc: David Alan Gilbert Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 81 ++++++++++++++------------------ 1 file changed, 35 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index c4425597769a..a7059a6bc215 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -55,6 +55,8 @@ #include #include #include +#include +#include #include "../kselftest.h" @@ -135,6 +137,13 @@ static void usage(void) exit(1); } +#define uffd_error(code, fmt, ...) \ + do { \ + fprintf(stderr, fmt, ##__VA_ARGS__); \ + fprintf(stderr, ": %" PRId64 "\n", (int64_t)(code)); \ + exit(1); \ + } while (0) + static void uffd_stats_reset(struct uffd_stats *uffd_stats, unsigned long n_cpus) { @@ -338,7 +347,7 @@ static int my_bcmp(char *str1, char *str2, size_t n) static void wp_range(int ufd, __u64 start, __u64 len, bool wp) { - struct uffdio_writeprotect prms = { 0 }; + struct uffdio_writeprotect prms; /* Write protection page faults */ prms.range.start = start; @@ -347,7 +356,8 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp) prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0; if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) { - fprintf(stderr, "clear WP failed for address 0x%Lx\n", start); + fprintf(stderr, "clear WP failed for address 0x%" PRIx64 "\n", + (uint64_t)start); exit(1); } } @@ -481,14 +491,11 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy->copy != -EEXIST) { - fprintf(stderr, "UFFDIO_COPY retry error %Ld\n", - uffdio_copy->copy); - exit(1); + uffd_error(uffdio_copy->copy, + "UFFDIO_COPY retry error"); } - } else { - fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n", - uffdio_copy->copy); exit(1); - } + } else + uffd_error(uffdio_copy->copy, "UFFDIO_COPY retry unexpected"); } static int __copy_page(int ufd, unsigned long offset, bool retry) @@ -509,14 +516,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry) uffdio_copy.copy = 0; if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ - if (uffdio_copy.copy != -EEXIST) { - fprintf(stderr, "UFFDIO_COPY error %Ld\n", - uffdio_copy.copy); - exit(1); - } + if (uffdio_copy.copy != -EEXIST) + uffd_error(uffdio_copy.copy, "UFFDIO_COPY error"); } else if (uffdio_copy.copy != page_size) { - fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", - uffdio_copy.copy); exit(1); + uffd_error(uffdio_copy.copy, "UFFDIO_COPY unexpected copy"); } else { if (test_uffdio_copy_eexist && retry) { test_uffdio_copy_eexist = false; @@ -795,7 +798,8 @@ static int userfaultfd_open(int features) return 1; } if (uffdio_api.api != UFFD_API) { - fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api); + fprintf(stderr, "UFFDIO_API error: %" PRIu64 "\n", + (uint64_t)uffdio_api.api); return 1; } @@ -957,13 +961,12 @@ static void retry_uffdio_zeropage(int ufd, offset); if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { if (uffdio_zeropage->zeropage != -EEXIST) { - fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n", - uffdio_zeropage->zeropage); - exit(1); + uffd_error(uffdio_zeropage->zeropage, + "UFFDIO_ZEROPAGE retry error"); } } else { - fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n", - uffdio_zeropage->zeropage); exit(1); + uffd_error(uffdio_zeropage->zeropage, + "UFFDIO_ZEROPAGE retry unexpected"); } } @@ -972,6 +975,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) struct uffdio_zeropage uffdio_zeropage; int ret; unsigned long has_zeropage; + __s64 res; has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); @@ -983,29 +987,17 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) uffdio_zeropage.range.len = page_size; uffdio_zeropage.mode = 0; ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); + res = uffdio_zeropage.zeropage; if (ret) { /* real retval in ufdio_zeropage.zeropage */ if (has_zeropage) { - if (uffdio_zeropage.zeropage == -EEXIST) { - fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"); - exit(1); - } else { - fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n", - uffdio_zeropage.zeropage); - exit(1); - } - } else { - if (uffdio_zeropage.zeropage != -EINVAL) { - fprintf(stderr, - "UFFDIO_ZEROPAGE not -EINVAL %Ld\n", - uffdio_zeropage.zeropage); - exit(1); - } - } + uffd_error(res, "UFFDIO_ZEROPAGE %s", + res == -EEXIST ? "-EEXIST" : "error"); + } else if (res != -EINVAL) + uffd_error(res, "UFFDIO_ZEROPAGE not -EINVAL"); } else if (has_zeropage) { - if (uffdio_zeropage.zeropage != page_size) { - fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", - uffdio_zeropage.zeropage); exit(1); + if (res != page_size) { + uffd_error(res, "UFFDIO_ZEROPAGE unexpected"); } else { if (test_uffdio_zeropage_eexist && retry) { test_uffdio_zeropage_eexist = false; @@ -1014,11 +1006,8 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) } return 1; } - } else { - fprintf(stderr, - "UFFDIO_ZEROPAGE succeeded %Ld\n", - uffdio_zeropage.zeropage); exit(1); - } + } else + uffd_error(res, "UFFDIO_ZEROPAGE succeeded"); return 0; } -- cgit v1.3.1 From 164c50be2878f4caf6d7973e8e0e438f182f4ded Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 14 Dec 2020 19:14:02 -0800 Subject: userfaultfd/selftests: always dump something in modes Patch series "userfaultfd: selftests: Small fixes". Some very trivial fixes that I kept locally to userfaultfd selftest program. This patch (of 3): BOUNCE_POLL is a special bit that if cleared it means "READ" instead. Dump that too otherwise we'll see tests with empty modes. Link: https://lkml.kernel.org/r/20201208024709.7701-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20201208024709.7701-2-peterx@redhat.com Signed-off-by: Peter Xu Cc: Andrea Arcangeli Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index a7059a6bc215..7fc1eaf4ceaa 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -1291,6 +1291,8 @@ static int userfaultfd_stress(void) printf(" ver"); if (bounces & BOUNCE_POLL) printf(" poll"); + else + printf(" read"); printf(", "); fflush(stdout); -- cgit v1.3.1 From 1e17a24edf9bef891bbdd02617eaab4fa6efcd7f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 14 Dec 2020 19:14:05 -0800 Subject: userfaultfd/selftests: fix retval check for userfaultfd_open() userfaultfd_open() returns 1 for errors rather than negatives. Fix it on all the callers so when UFFDIO_API failed the test will bail out. Link: https://lkml.kernel.org/r/20201208024709.7701-3-peterx@redhat.com Signed-off-by: Peter Xu Cc: Andrea Arcangeli Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7fc1eaf4ceaa..6e482e2f198c 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -1029,7 +1029,7 @@ static int userfaultfd_zeropage_test(void) if (uffd_test_ops->release_pages(area_dst)) return 1; - if (userfaultfd_open(0) < 0) + if (userfaultfd_open(0)) return 1; uffdio_register.range.start = (unsigned long) area_dst; uffdio_register.range.len = nr_pages * page_size; @@ -1079,7 +1079,7 @@ static int userfaultfd_events_test(void) features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE; - if (userfaultfd_open(features) < 0) + if (userfaultfd_open(features)) return 1; fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); @@ -1151,7 +1151,7 @@ static int userfaultfd_sig_test(void) return 1; features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS; - if (userfaultfd_open(features) < 0) + if (userfaultfd_open(features)) return 1; fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); @@ -1231,7 +1231,7 @@ static int userfaultfd_stress(void) if (!area_dst) return 1; - if (userfaultfd_open(0) < 0) + if (userfaultfd_open(0)) return 1; count_verify = malloc(nr_pages * sizeof(unsigned long long)); -- cgit v1.3.1 From d9f411bacfa0c3d0d97580a66f88e70f92bcf58e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 14 Dec 2020 19:14:08 -0800 Subject: userfaultfd/selftests: hint the test runner on required privilege Now userfaultfd test program requires either root or ptrace privilege due to the signal/event tests. When UFFDIO_API failed, hint the test runner about this fact verbosely. Link: https://lkml.kernel.org/r/20201208024709.7701-4-peterx@redhat.com Signed-off-by: Peter Xu Cc: Andrea Arcangeli Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 6e482e2f198c..9d8650d4ba5a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -794,7 +794,8 @@ static int userfaultfd_open(int features) uffdio_api.api = UFFD_API; uffdio_api.features = features; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { - fprintf(stderr, "UFFDIO_API\n"); + fprintf(stderr, "UFFDIO_API failed.\nPlease make sure to " + "run with either root or ptrace capability.\n"); return 1; } if (uffdio_api.api != UFFD_API) { -- cgit v1.3.1 From febebaf366868a4204deb3955ef5dda17f676fc1 Mon Sep 17 00:00:00 2001 From: Francis Laniel Date: Tue, 15 Dec 2020 20:43:54 -0800 Subject: drivers/misc/lkdtm: add new file in LKDTM to test fortified strscpy This new test ensures that fortified strscpy has the same behavior than vanilla strscpy (e.g. returning -E2BIG when src content is truncated). Finally, it generates a crash at runtime because there is a write overflow in destination string. Link: https://lkml.kernel.org/r/20201122162451.27551-5-laniel_francis@privacyrequired.com Signed-off-by: Francis Laniel Reviewed-by: Kees Cook Cc: Daniel Axtens Cc: Daniel Micay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/lkdtm/Makefile | 1 + drivers/misc/lkdtm/core.c | 1 + drivers/misc/lkdtm/fortify.c | 82 +++++++++++++++++++++++++++++++++ drivers/misc/lkdtm/lkdtm.h | 3 ++ tools/testing/selftests/lkdtm/tests.txt | 1 + 5 files changed, 88 insertions(+) create mode 100644 drivers/misc/lkdtm/fortify.c (limited to 'tools') diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index 1ef7888a12b5..7727bfd32be9 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -10,6 +10,7 @@ lkdtm-$(CONFIG_LKDTM) += rodata_objcopy.o lkdtm-$(CONFIG_LKDTM) += usercopy.o lkdtm-$(CONFIG_LKDTM) += stackleak.o lkdtm-$(CONFIG_LKDTM) += cfi.o +lkdtm-$(CONFIG_LKDTM) += fortify.o KASAN_SANITIZE_rodata.o := n KASAN_SANITIZE_stackleak.o := n diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index b8c51a633fcc..3c0a67f072c0 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -175,6 +175,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(USERCOPY_KERNEL), CRASHTYPE(STACKLEAK_ERASING), CRASHTYPE(CFI_FORWARD_PROTO), + CRASHTYPE(FORTIFIED_STRSCPY), #ifdef CONFIG_X86_32 CRASHTYPE(DOUBLE_FAULT), #endif diff --git a/drivers/misc/lkdtm/fortify.c b/drivers/misc/lkdtm/fortify.c new file mode 100644 index 000000000000..faf29cf04baa --- /dev/null +++ b/drivers/misc/lkdtm/fortify.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Francis Laniel + * + * Add tests related to fortified functions in this file. + */ +#include "lkdtm.h" +#include +#include + + +/* + * Calls fortified strscpy to test that it returns the same result as vanilla + * strscpy and generate a panic because there is a write overflow (i.e. src + * length is greater than dst length). + */ +void lkdtm_FORTIFIED_STRSCPY(void) +{ + char *src; + char dst[5]; + + struct { + union { + char big[10]; + char src[5]; + }; + } weird = { .big = "hello!" }; + char weird_dst[sizeof(weird.src) + 1]; + + src = kstrdup("foobar", GFP_KERNEL); + + if (src == NULL) + return; + + /* Vanilla strscpy returns -E2BIG if size is 0. */ + if (strscpy(dst, src, 0) != -E2BIG) + pr_warn("FAIL: strscpy() of 0 length did not return -E2BIG\n"); + + /* Vanilla strscpy returns -E2BIG if src is truncated. */ + if (strscpy(dst, src, sizeof(dst)) != -E2BIG) + pr_warn("FAIL: strscpy() did not return -E2BIG while src is truncated\n"); + + /* After above call, dst must contain "foob" because src was truncated. */ + if (strncmp(dst, "foob", sizeof(dst)) != 0) + pr_warn("FAIL: after strscpy() dst does not contain \"foob\" but \"%s\"\n", + dst); + + /* Shrink src so the strscpy() below succeeds. */ + src[3] = '\0'; + + /* + * Vanilla strscpy returns number of character copied if everything goes + * well. + */ + if (strscpy(dst, src, sizeof(dst)) != 3) + pr_warn("FAIL: strscpy() did not return 3 while src was copied entirely truncated\n"); + + /* After above call, dst must contain "foo" because src was copied. */ + if (strncmp(dst, "foo", sizeof(dst)) != 0) + pr_warn("FAIL: after strscpy() dst does not contain \"foo\" but \"%s\"\n", + dst); + + /* Test when src is embedded inside a union. */ + strscpy(weird_dst, weird.src, sizeof(weird_dst)); + + if (strcmp(weird_dst, "hello") != 0) + pr_warn("FAIL: after strscpy() weird_dst does not contain \"hello\" but \"%s\"\n", + weird_dst); + + /* Restore src to its initial value. */ + src[3] = 'b'; + + /* + * Use strlen here so size cannot be known at compile time and there is + * a runtime write overflow. + */ + strscpy(dst, src, strlen(src)); + + pr_warn("FAIL: No overflow in above strscpy()\n"); + + kfree(src); +} diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index 49e6b945feb7..138f06254b61 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -104,4 +104,7 @@ void lkdtm_STACKLEAK_ERASING(void); /* cfi.c */ void lkdtm_CFI_FORWARD_PROTO(void); +/* fortify.c */ +void lkdtm_FORTIFIED_STRSCPY(void); + #endif diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 74a8d329a72c..92ba4cc41314 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -68,3 +68,4 @@ USERCOPY_STACK_BEYOND USERCOPY_KERNEL STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO +FORTIFIED_STRSCPY -- cgit v1.3.1 From 44f6a7c0755d8dd453c70557e11687bb080a6f21 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 14 Dec 2020 16:04:20 -0600 Subject: objtool: Fix seg fault with Clang non-section symbols The Clang assembler likes to strip section symbols, which means objtool can't reference some text code by its section. This confuses objtool greatly, causing it to seg fault. The fix is similar to what was done before, for ORC reloc generation: e81e07244325 ("objtool: Support Clang non-section symbols in ORC generation") Factor out that code into a common helper and use it for static call reloc generation as well. Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nick Desaulniers Reviewed-by: Miroslav Benes Link: https://github.com/ClangBuiltLinux/linux/issues/1207 Link: https://lkml.kernel.org/r/ba6b6c0f0dd5acbba66e403955a967d9fdd1726a.1607983452.git.jpoimboe@redhat.com --- tools/objtool/check.c | 11 +++++++++-- tools/objtool/elf.c | 26 ++++++++++++++++++++++++++ tools/objtool/elf.h | 2 ++ tools/objtool/orc_gen.c | 29 +++++------------------------ 4 files changed, 42 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c6ab44543c92..5f8d3eed78a1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -467,13 +467,20 @@ static int create_static_call_sections(struct objtool_file *file) /* populate reloc for 'addr' */ reloc = malloc(sizeof(*reloc)); + if (!reloc) { perror("malloc"); return -1; } memset(reloc, 0, sizeof(*reloc)); - reloc->sym = insn->sec->sym; - reloc->addend = insn->offset; + + insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); + if (!reloc->sym) { + WARN_FUNC("static call tramp: missing containing symbol", + insn->sec, insn->offset); + return -1; + } + reloc->type = R_X86_64_PC32; reloc->offset = idx * sizeof(struct static_call_site); reloc->sec = reloc_sec; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4e1d7460574b..be89c741ba9a 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -262,6 +262,32 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns return find_reloc_by_dest_range(elf, sec, offset, 1); } +void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, + struct reloc *reloc) +{ + if (sec->sym) { + reloc->sym = sec->sym; + reloc->addend = offset; + return; + } + + /* + * The Clang assembler strips section symbols, so we have to reference + * the function symbol instead: + */ + reloc->sym = find_symbol_containing(sec, offset); + if (!reloc->sym) { + /* + * Hack alert. This happens when we need to reference the NOP + * pad insn immediately after the function. + */ + reloc->sym = find_symbol_containing(sec, offset - 1); + } + + if (reloc->sym) + reloc->addend = offset - reloc->sym->offset; +} + static int read_sections(struct elf *elf) { Elf_Scn *s = NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 807f8c670097..e6890cc70a25 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -140,6 +140,8 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len); struct symbol *find_func_containing(struct section *sec, unsigned long offset); +void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, + struct reloc *reloc); int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); #define for_each_sec(file, sec) \ diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 235663b96adc..9ce68b385a1b 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -105,30 +105,11 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti } memset(reloc, 0, sizeof(*reloc)); - if (insn_sec->sym) { - reloc->sym = insn_sec->sym; - reloc->addend = insn_off; - } else { - /* - * The Clang assembler doesn't produce section symbols, so we - * have to reference the function symbol instead: - */ - reloc->sym = find_symbol_containing(insn_sec, insn_off); - if (!reloc->sym) { - /* - * Hack alert. This happens when we need to reference - * the NOP pad insn immediately after the function. - */ - reloc->sym = find_symbol_containing(insn_sec, - insn_off - 1); - } - if (!reloc->sym) { - WARN("missing symbol for insn at offset 0x%lx\n", - insn_off); - return -1; - } - - reloc->addend = insn_off - reloc->sym->offset; + insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); + if (!reloc->sym) { + WARN("missing symbol for insn at offset 0x%lx", + insn_off); + return -1; } reloc->type = R_X86_64_PC32; -- cgit v1.3.1 From 1a3449c19407a28f7019a887cdf0d6ba2444751a Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Tue, 15 Dec 2020 10:20:10 -0800 Subject: selftests/bpf: Clarify build error if no vmlinux If Makefile cannot find any of the vmlinux's in its VMLINUX_BTF_PATHS list, it tries to run btftool incorrectly, with VMLINUX_BTF unset: bpftool btf dump file $(VMLINUX_BTF) format c Such that the keyword 'format' is misinterpreted as the path to vmlinux. The resulting build error message is fairly cryptic: GEN vmlinux.h Error: failed to load BTF from format: No such file or directory This patch makes the failure reason clearer by yielding this instead: Makefile:...: *** Cannot find a vmlinux for VMLINUX_BTF at any of "{paths}". Stop. Fixes: acbd06206bbb ("selftests/bpf: Add vmlinux.h selftest exercising tracing of syscalls") Signed-off-by: Kamal Mostafa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201215182011.15755-1-kamal@canonical.com --- tools/testing/selftests/bpf/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8c33e999319a..c51df6b91bef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +ifeq ($(VMLINUX_BTF),) +$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") +endif # Define simple and short `make test_progs`, `make test_sysctl`, etc targets # to build individual tests. -- cgit v1.3.1 From 09d59c2f3465fb01e65a0c96698697b026ea8e79 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 4 Dec 2020 00:08:36 +0100 Subject: tools build: Add missing libcap to test-all.bin target We're missing -lcap in test-all.bin target, so in case it's the only library missing (if more are missing test-all.bin fails anyway), we will falsely claim that we detected it and fail build, like: $ make ... Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... libbfd: [ on ] ... libbfd-buildid: [ on ] ... libcap: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... libzstd: [ on ] ... disassembler-four-args: [ on ] ... CC builtin-ftrace.o In file included from builtin-ftrace.c:29: util/cap.h:11:10: fatal error: sys/capability.h: No such file or directory 11 | #include | ^~~~~~~~~~~~~~~~~~ compilation terminated. Fixes: 74d5f3d06f707eb5 ("tools build: Add capability-related feature detection") Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Igor Lubashev Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201203230836.3751981-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index cdde783f3018..89ba522e377d 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -90,7 +90,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap $(OUTPUT)test-hello.bin: $(BUILD) -- cgit v1.3.1 From 2eb5dd418034ecea2f7031e3d33f2991a878b148 Mon Sep 17 00:00:00 2001 From: Zheng Zengkai Date: Fri, 3 Jul 2020 17:33:44 +0800 Subject: perf record: Fix memory leak when using '--user-regs=?' to list registers When using 'perf record's option '-I' or '--user-regs=' along with argument '?' to list available register names, memory of variable 'os' allocated by strdup() needs to be released before __parse_regs() returns, otherwise memory leak will occur. Fixes: bcc84ec65ad1 ("perf record: Add ability to name registers to record") Signed-off-by: Zheng Zengkai Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Li Bin Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20200703093344.189450-1-zhengzengkai@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-regs-options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index e687497b3aac..a4a100425b3a 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -54,7 +54,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr) #endif fputc('\n', stderr); /* just printing available regs */ - return -1; + goto error; } #ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { -- cgit v1.3.1 From bf53fc6b5f415cddc7118091cb8fd6a211b2320d Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Fri, 4 Dec 2020 09:17:02 -0300 Subject: perf unwind: Fix separate debug info files when using elfutils' libdw's unwinder elfutils needs to be provided main binary and separate debug info file respectively. Providing separate debug info file instead of the main binary is not sufficient. One needs to try both supplied filename and its possible cache by its build-id depending on the use case. Signed-off-by: Jan Kratochvil Tested-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Ian Rogers Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/unwind-libdw.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 7a3dbc259cec..0ada907c60d4 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -20,10 +20,24 @@ static char *debuginfo_path; +static int __find_debuginfo(Dwfl_Module *mod __maybe_unused, void **userdata, + const char *modname __maybe_unused, Dwarf_Addr base __maybe_unused, + const char *file_name, const char *debuglink_file __maybe_unused, + GElf_Word debuglink_crc __maybe_unused, char **debuginfo_file_name) +{ + const struct dso *dso = *userdata; + + assert(dso); + if (dso->symsrc_filename && strcmp (file_name, dso->symsrc_filename)) + *debuginfo_file_name = strdup(dso->symsrc_filename); + return -1; +} + static const Dwfl_Callbacks offline_callbacks = { - .find_debuginfo = dwfl_standard_find_debuginfo, + .find_debuginfo = __find_debuginfo, .debuginfo_path = &debuginfo_path, .section_address = dwfl_offline_section_address, + // .find_elf is not set as we use dwfl_report_elf() instead. }; static int __report_module(struct addr_location *al, u64 ip, @@ -46,16 +60,24 @@ static int __report_module(struct addr_location *al, u64 ip, mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; + void **userdatap; - dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); + dwfl_module_info(mod, &userdatap, &s, NULL, NULL, NULL, NULL, NULL); + *userdatap = dso; if (s != al->map->start - al->map->pgoff) mod = 0; } if (!mod) - mod = dwfl_report_elf(ui->dwfl, dso->short_name, - (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff, - false); + mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, + al->map->start - al->map->pgoff, false); + if (!mod) { + char filename[PATH_MAX]; + + if (dso__build_id_filename(dso, filename, sizeof(filename), false)) + mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, + al->map->start - al->map->pgoff, false); + } return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; } -- cgit v1.3.1 From 47d982202f8cfaac6f208c9109fa15cb6a0181f7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 09:27:52 -0800 Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h To get the changes in: commit 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE") commit 995f088efebe ("perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Signed-off-by: Kan Liang Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201130172803.2676-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b95d3c485d27..b15e3447cd9f 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -143,8 +143,10 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -896,6 +898,8 @@ enum perf_event_type { * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * { u64 size; * char data[size]; } && PERF_SAMPLE_AUX + * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE + * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE * }; */ PERF_RECORD_SAMPLE = 9, -- cgit v1.3.1 From 542b88fd12769bf5be307b11ca0f94a6140bba82 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 09:27:53 -0800 Subject: perf record: Support new sample type for data page size Support new sample type PERF_SAMPLE_DATA_PAGE_SIZE for page size. Add new option --data-page-size to record sample data page size. Signed-off-by: Kan Liang Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201130172803.2676-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 2 ++ tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 9 +++++++++ tools/perf/util/perf_event_attr_fprintf.c | 2 +- tools/perf/util/record.h | 1 + tools/perf/util/synthetic-events.c | 8 ++++++++ 7 files changed, 25 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 768888b9326a..2d30e525a600 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -293,6 +293,9 @@ OPTIONS --phys-data:: Record the sample physical addresses. +--data-page-size:: + Record the sampled data address data page size. + -T:: --timestamp:: Record the sample timestamps. Use it with 'perf report -D' to see the diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d832c108a1ca..fd3911650612 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2472,6 +2472,8 @@ static struct option __record_options[] = { OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, "Record the sample physical addresses"), + OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, + "Record the sampled data address data page size"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b828b99176f4..448ac30c2fc4 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -135,6 +135,7 @@ struct perf_sample { u32 raw_size; u64 data_src; u64 phys_addr; + u64 data_page_size; u64 cgroup; u32 flags; u16 insn_len; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3dd0eae9810d..5e6085c3fc76 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1188,6 +1188,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, evsel__set_sample_bit(evsel, CGROUP); } + if (opts->sample_data_page_size) + evsel__set_sample_bit(evsel, DATA_PAGE_SIZE); + if (opts->record_switch_events) attr->context_switch = track; @@ -2355,6 +2358,12 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->data_page_size = 0; + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) { + data->data_page_size = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index e67a227c0ce7..fb0bb6684438 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,7 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), - bit_name(CGROUP), + bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 266760ac9143..694b351dcd27 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -22,6 +22,7 @@ struct record_opts { bool raw_samples; bool sample_address; bool sample_phys_addr; + bool sample_data_page_size; bool sample_weight; bool sample_time; bool sample_time_set; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 297987c6960b..2947e3f3c6d9 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1409,6 +1409,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_CGROUP) result += sizeof(u64); + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1588,6 +1591,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) { + *array = sample->data_page_size; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; -- cgit v1.3.1 From 456ef4c11c06f0b8c53acaf796d77d2033f079f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Dec 2020 14:04:05 -0300 Subject: perf evsel: Emit warning about kernel not supporting the data page size sample_type bit Before we had this unhelpful message: $ perf record --data-page-size sleep 1 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles:u). /bin/dmesg | grep -i perf may provide additional information. $ Add support to the perf_missing_features variable to remember what caused evsel__open() to fail and then use that information in evsel__open_strerror(). $ perf record --data-page-size sleep 1 Error: Asking for the data page size isn't supported by this kernel. $ Cc: Kan Liang Cc: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201207170759.GB129853@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 9 ++++++++- tools/perf/util/evsel.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5e6085c3fc76..c26ea82220bd 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1873,7 +1873,12 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { + if (!perf_missing_features.data_page_size && + (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { + perf_missing_features.data_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { perf_missing_features.cgroup = true; pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); goto out_close; @@ -2673,6 +2678,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, "We found oprofile daemon running, please stop it and try again."); break; case EINVAL: + if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size) + return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel."); if (evsel->core.attr.write_backward && perf_missing_features.write_backward) return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel."); if (perf_missing_features.clockid) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 79a860d8e3ee..cd1d8dd43199 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -144,6 +144,7 @@ struct perf_missing_features { bool aux_output; bool branch_hw_idx; bool cgroup; + bool data_page_size; }; extern struct perf_missing_features perf_missing_features; -- cgit v1.3.1 From 4853f1caa43ea41a544c50a7cefc42e147aafeda Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:07 +0800 Subject: perf jevents: Add support for an extra directory level Currently only upto a level 2 directory is supported, in form vendor/platform. Add support for a further level, to support vendor/platform sub-directories in future, which will be vendor/platform/cpu and vendor/platform/sys. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 72cfa3b5046d..9022216b1253 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -978,15 +978,20 @@ static int process_one_file(const char *fpath, const struct stat *sb, int level = ftwbuf->level; int err = 0; - if (level == 2 && is_dir) { + if (level >= 2 && is_dir) { + int count = 0; /* * For level 2 directory, bname will include parent name, * like vendor/platform. So search back from platform dir * to find this. + * Something similar for level 3 directory, but we're a PMU + * category folder, like vendor/platform/cpu. */ bname = (char *) fpath + ftwbuf->base - 2; for (;;) { if (*bname == '/') + count++; + if (count == level - 1) break; bname--; } @@ -999,13 +1004,13 @@ static int process_one_file(const char *fpath, const struct stat *sb, level, sb->st_size, bname, fpath); /* base dir or too deep */ - if (level == 0 || level > 3) + if (level == 0 || level > 4) return 0; /* model directory, reset topic */ if ((level == 1 && is_dir && is_leaf_dir(fpath)) || - (level == 2 && is_dir)) { + (level >= 2 && is_dir && is_leaf_dir(fpath))) { if (close_table) print_events_table_suffix(eventsfp); -- cgit v1.3.1 From 4689f56796f87abee190d8a959dd318e006c5b5a Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:08 +0800 Subject: perf jevents: Add support for system events tables Process the JSONs to find support for "system" events, which are not tied to a specific CPUID. A "COMPAT" property is now used to match against the namespace ID from the kernel PMU driver. The generated pmu-events.c will now have 2 tables: a. CPU events, as before. b. New pmu_sys_event_tables[] table, which will have events matched to specific SoCs. It will look like this: struct pmu_event pme_hisilicon_hip09_sys[] = { { .name = "cycles", .compat = "0x00030736", .event = "event=0", .desc = "Clock cycles", .topic = "smmu v3 pmcg", .long_desc = "Clock cycles", }, { .name = "smmuv3_pmcg.l1_tlb", .compat = "0x00030736", .event = "event=0x8a", .desc = "SMMUv3 PMCG l1_tlb. Unit: smmuv3_pmcg ", .topic = "smmu v3 pmcg", .long_desc = "SMMUv3 PMCG l1_tlb", .pmu = "smmuv3_pmcg", }, ... }; struct pmu_event pme_arm_cortex_a53[] = { { .name = "ext_mem_req", .event = "event=0xc0", .desc = "External memory request", .topic = "memory", }, { .name = "ext_mem_req_nc", .event = "event=0xc1", .desc = "Non-cacheable external memory request", .topic = "memory", }, ... }; struct pmu_event pme_hisilicon_hip09_cpu[] = { { .name = "l2d_cache_refill_wr", .event = "event=0x53", .desc = "L2D cache refill, write", .topic = "core imp def", .long_desc = "Attributable Level 2 data cache refill, write", }, ... }; struct pmu_events_map pmu_events_map[] = { { .cpuid = "0x00000000410fd030", .version = "v1", .type = "core", .table = pme_arm_cortex_a53 }, { .cpuid = "0x00000000480fd010", .version = "v1", .type = "core", .table = pme_hisilicon_hip09_cpu }, { .table = 0 }, }; struct pmu_event pme_hisilicon_hip09_cpu[] = { { .name = "uncore_hisi_l3c.rd_cpipe", .event = "event=0", .desc = "Total read accesses. Unit: hisi_sccl,l3c ", .topic = "uncore l3c", .long_desc = "Total read accesses", .pmu = "hisi_sccl,l3c", }, { .name = "uncore_hisi_l3c.wr_cpipe", .event = "event=0x1", .desc = "Total write accesses. Unit: hisi_sccl,l3c ", .topic = "uncore l3c", .long_desc = "Total write accesses", .pmu = "hisi_sccl,l3c", }, ... }; struct pmu_sys_events pmu_sys_event_tables[] = { { .table = pme_hisilicon_hip09_sys, }, ... }; Committer notes: Added the fix for architectures without PMU events, provided by John after I reported the build failing in such systems. Link: https://lore.kernel.org/lkml/650baaf2-36b6-a9e2-ff49-963ef864c1f3@huawei.com/ Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 86 +++++++++++++++++++++++++++++++++++++- tools/perf/pmu-events/pmu-events.h | 6 +++ 2 files changed, 91 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 9022216b1253..214975c819ff 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -55,6 +55,7 @@ char *prog; struct json_event { char *name; + char *compat; char *event; char *desc; char *long_desc; @@ -82,6 +83,23 @@ enum aggr_mode_class convert(const char *aggr_mode) typedef int (*func)(void *data, struct json_event *je); +static LIST_HEAD(sys_event_tables); + +struct sys_event_table { + struct list_head list; + char *soc_id; +}; + +static void free_sys_event_tables(void) +{ + struct sys_event_table *et, *next; + + list_for_each_entry_safe(et, next, &sys_event_tables, list) { + free(et->soc_id); + free(et); + } +} + int eprintf(int level, int var, const char *fmt, ...) { @@ -360,6 +378,8 @@ static int print_events_table_entry(void *data, struct json_event *je) if (je->event) fprintf(outfp, "\t.event = \"%s\",\n", je->event); fprintf(outfp, "\t.desc = \"%s\",\n", je->desc); + if (je->compat) + fprintf(outfp, "\t.compat = \"%s\",\n", je->compat); fprintf(outfp, "\t.topic = \"%s\",\n", topic); if (je->long_desc && je->long_desc[0]) fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc); @@ -390,6 +410,7 @@ struct event_struct { struct list_head list; char *name; char *event; + char *compat; char *desc; char *long_desc; char *pmu; @@ -583,6 +604,8 @@ static int json_events(const char *fn, free(code); } else if (json_streq(map, field, "EventName")) { addfield(map, &je.name, "", "", val); + } else if (json_streq(map, field, "Compat")) { + addfield(map, &je.compat, "", "", val); } else if (json_streq(map, field, "BriefDescription")) { addfield(map, &je.desc, "", "", val); fixdesc(je.desc); @@ -683,6 +706,7 @@ free_strings: free(event); free(je.desc); free(je.name); + free(je.compat); free(je.long_desc); free(extra_desc); free(je.pmu); @@ -747,6 +771,15 @@ static char *file_name_to_table_name(char *fname) return tblname; } +static bool is_sys_dir(char *fname) +{ + size_t len = strlen(fname), len2 = strlen("/sys"); + + if (len2 > len) + return false; + return !strcmp(fname+len-len2, "/sys"); +} + static void print_mapping_table_prefix(FILE *outfp) { fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n"); @@ -781,6 +814,33 @@ static void print_mapping_test_table(FILE *outfp) fprintf(outfp, "},\n"); } +static void print_system_event_mapping_table_prefix(FILE *outfp) +{ + fprintf(outfp, "\nstruct pmu_sys_events pmu_sys_event_tables[] = {"); +} + +static void print_system_event_mapping_table_suffix(FILE *outfp) +{ + fprintf(outfp, "\n\t{\n\t\t.table = 0\n\t},"); + fprintf(outfp, "\n};\n"); +} + +static int process_system_event_tables(FILE *outfp) +{ + struct sys_event_table *sys_event_table; + + print_system_event_mapping_table_prefix(outfp); + + list_for_each_entry(sys_event_table, &sys_event_tables, list) { + fprintf(outfp, "\n\t{\n\t\t.table = %s,\n\t},", + sys_event_table->soc_id); + } + + print_system_event_mapping_table_suffix(outfp); + + return 0; +} + static int process_mapfile(FILE *outfp, char *fpath) { int n = 16384; @@ -886,6 +946,8 @@ static void create_empty_mapping(const char *output_file) fprintf(outfp, "#include \"pmu-events/pmu-events.h\"\n"); print_mapping_table_prefix(outfp); print_mapping_table_suffix(outfp); + print_system_event_mapping_table_prefix(outfp); + print_system_event_mapping_table_suffix(outfp); fclose(outfp); } @@ -1026,6 +1088,22 @@ static int process_one_file(const char *fpath, const struct stat *sb, return -1; } + if (is_sys_dir(bname)) { + struct sys_event_table *sys_event_table; + + sys_event_table = malloc(sizeof(*sys_event_table)); + if (!sys_event_table) + return -1; + + sys_event_table->soc_id = strdup(tblname); + if (!sys_event_table->soc_id) { + free(sys_event_table); + return -1; + } + list_add_tail(&sys_event_table->list, + &sys_event_tables); + } + print_events_table_prefix(eventsfp, tblname); return 0; } @@ -1185,10 +1263,16 @@ int main(int argc, char *argv[]) } rc = process_mapfile(eventsfp, mapfile); - fclose(eventsfp); if (rc) { pr_info("%s: Error processing mapfile %s\n", prog, mapfile); /* Make build fail */ + ret = 1; + goto err_close_eventsfp; + } + + rc = process_system_event_tables(eventsfp); + fclose(eventsfp); + if (rc) { ret = 1; goto err_out; } diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 7da1a3743b77..d1172f6aebf1 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -12,6 +12,7 @@ enum aggr_mode_class { */ struct pmu_event { const char *name; + const char *compat; const char *event; const char *desc; const char *topic; @@ -43,10 +44,15 @@ struct pmu_events_map { struct pmu_event *table; }; +struct pmu_sys_events { + struct pmu_event *table; +}; + /* * Global table mapping each known CPU for the architecture to its * table of PMU events. */ extern struct pmu_events_map pmu_events_map[]; +extern struct pmu_sys_events pmu_sys_event_tables[]; #endif -- cgit v1.3.1 From 51d548471510843e56d9f427aa6473ca0981c4a4 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:09 +0800 Subject: perf pmu: Add pmu_id() Add a function to read the PMU id sysfs entry. This is only done for uncore PMUs where this would possibly be relevant. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 18 ++++++++++++++++++ tools/perf/util/pmu.h | 1 + 2 files changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d41caeb35cf6..cbeda45ce578 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -597,6 +597,7 @@ static struct perf_cpu_map *__pmu_cpumask(const char *path) * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ +#define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" #define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" #define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" @@ -635,6 +636,21 @@ static bool pmu_is_uncore(const char *name) return file_available(path); } +static char *pmu_id(const char *name) +{ + char path[PATH_MAX], *str; + size_t len; + + snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name); + + if (sysfs__read_str(path, &str, &len) < 0) + return NULL; + + str[len - 1] = 0; /* remove line feed */ + + return str; +} + /* * PMU CORE devices have different name other than cpu in sysfs on some * platforms. @@ -847,6 +863,8 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->name = strdup(name); pmu->type = type; pmu->is_uncore = pmu_is_uncore(name); + if (pmu->is_uncore) + pmu->id = pmu_id(name); pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index a64e9c9ce731..d4366e8e79df 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -30,6 +30,7 @@ struct perf_pmu_caps { struct perf_pmu { char *name; + char *id; __u32 type; bool selectable; bool is_uncore; -- cgit v1.3.1 From 4513c719c6f1ccf0c362c8dcef1f9b476f8f5c9c Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:10 +0800 Subject: perf pmu: Add pmu_add_sys_aliases() Add pmu_add_sys_aliases() to add system PMU events aliases. For adding system PMU events, iterate through all the events for all SoC event tables in pmu_sys_event_tables[]. Matches must satisfy both: - PMU identifier matches event "compat" value - event "Unit" member must match, same as uncore event aliases matched by CPUID Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-5-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/pmu.h | 2 ++ 2 files changed, 80 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index cbeda45ce578..44ef28302fc7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -812,6 +812,83 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) pmu_add_cpu_aliases_map(head, pmu, map); } +void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) +{ + int i = 0; + + while (1) { + struct pmu_sys_events *event_table; + int j = 0; + + event_table = &pmu_sys_event_tables[i++]; + + if (!event_table->table) + break; + + while (1) { + struct pmu_event *pe = &event_table->table[j++]; + int ret; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + + ret = fn(pe, data); + if (ret) + break; + } + } +} + +struct pmu_sys_event_iter_data { + struct list_head *head; + struct perf_pmu *pmu; +}; + +static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data) +{ + struct pmu_sys_event_iter_data *idata = data; + struct perf_pmu *pmu = idata->pmu; + + if (!pe->name) { + if (pe->metric_group || pe->metric_name) + return 0; + return -EINVAL; + } + + if (!pe->compat || !pe->pmu) + return 0; + + if (!strcmp(pmu->id, pe->compat) && + pmu_uncore_alias_match(pe->pmu, pmu->name)) { + __perf_pmu__new_alias(idata->head, NULL, + (char *)pe->name, + (char *)pe->desc, + (char *)pe->event, + (char *)pe->long_desc, + (char *)pe->topic, + (char *)pe->unit, + (char *)pe->perpkg, + (char *)pe->metric_expr, + (char *)pe->metric_name, + (char *)pe->deprecated); + } + + return 0; +} + +static void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu) +{ + struct pmu_sys_event_iter_data idata = { + .head = head, + .pmu = pmu, + }; + + if (!pmu->id) + return; + + pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, &idata); +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { @@ -867,6 +944,7 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->id = pmu_id(name); pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); + pmu_add_sys_aliases(&aliases, pmu); INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d4366e8e79df..8164388478c6 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -117,6 +117,8 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); +typedef int (*pmu_sys_event_iter_fn)(struct pmu_event *pe, void *data); +void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); -- cgit v1.3.1 From 6d2783fe365fa5f571cf1416b5f5b1e352447a0e Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:11 +0800 Subject: perf evlist: Change evlist__splice_list_tail() ordering Function find_evsel_group() expects events to be ordered such that they are grouped after their leader. Modify evlist__splice_list_tail() to guarantee this (ordering). [Should prob also change the function name] Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-6-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 493819173a8e..89286f148012 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -179,11 +179,22 @@ void evlist__remove(struct evlist *evlist, struct evsel *evsel) void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list) { - struct evsel *evsel, *temp; + while (!list_empty(list)) { + struct evsel *evsel, *temp, *leader = NULL; - __evlist__for_each_entry_safe(list, temp, evsel) { - list_del_init(&evsel->core.node); - evlist__add(evlist, evsel); + __evlist__for_each_entry_safe(list, temp, evsel) { + list_del_init(&evsel->core.node); + evlist__add(evlist, evsel); + leader = evsel; + break; + } + + __evlist__for_each_entry_safe(list, temp, evsel) { + if (evsel->leader == leader) { + list_del_init(&evsel->core.node); + evlist__add(evlist, evsel); + } + } } } -- cgit v1.3.1 From c2337d67199a1ea1c75083da5d376aced1ab2c40 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:12 +0800 Subject: perf metricgroup: Fix metrics using aliases covering multiple PMUs Support for metric expressions using aliases which cover multiple PMUs is broken. Consider the following test metric expression: "MetricExpr": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE * UNC_CBO_XSNP_RESPONSE.MISS_EVICTION" When used on my broadwell, "perf stat" gives: unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_1/umask=0x81,event=0x22/ unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_0/umask=0x81,event=0x22/ unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_1/umask=0x41,event=0x22/ unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_0/umask=0x41,event=0x22/ Control descriptor is not initialized unc_cbo_xsnp_response.miss_eviction: 3645925 1000850523 1000850523 unc_cbo_xsnp_response.miss_xcore: 106850 1000850523 1000850523 Performance counter stats for 'system wide': 3,645,925 unc_cbo_xsnp_response.miss_eviction # 389567086250.00 test_metric_inc 106,850 unc_cbo_xsnp_response.miss_xcore 1.000883096 seconds time elapsed Notice that only the results from one PMU are included. Fix the logic of find_evsel_group() to enable events which apply to multiple PMUs, by checking if the event pmu_name matches that of the metric event. With that, "perf stat" now gives: unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_1/umask=0x81,event=0x22/ unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_0/umask=0x81,event=0x22/ unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_1/umask=0x41,event=0x22/ unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_0/umask=0x41,event=0x22/ Control descriptor is not initialized unc_cbo_xsnp_response.miss_eviction: 4237983 1000904100 1000904100 unc_cbo_xsnp_response.miss_xcore: 218643 1000904100 1000904100 unc_cbo_xsnp_response.miss_eviction: 4254148 1000902629 1000902629 unc_cbo_xsnp_response.miss_xcore: 213352 1000902629 1000902629 Performance counter stats for 'system wide': 4,237,983 unc_cbo_xsnp_response.miss_eviction # 3668558131345.00 test_metric_inc 218,643 unc_cbo_xsnp_response.miss_xcore 4,254,148 unc_cbo_xsnp_response.miss_eviction 213,352 unc_cbo_xsnp_response.miss_xcore 1.000938151 seconds time elapsed Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-7-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 81d201c8b833..b89160718c04 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -279,7 +279,9 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, * when then group is left. */ if (!has_constraint && - ev->leader != metric_events[i]->leader) + ev->leader != metric_events[i]->leader && + !strcmp(ev->leader->pmu_name, + metric_events[i]->leader->pmu_name)) break; if (!strcmp(metric_events[i]->name, ev->name)) { set_bit(ev->idx, evlist_used); -- cgit v1.3.1 From f6fe1e48ae185d028dfcabecb7d79036e2d89d27 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:13 +0800 Subject: perf metricgroup: Split up metricgroup__print() To aid supporting system event metric groups, break up the function metricgroup__print() into a part which iterates metrics and a part which actually "prints" the metric. No functional change intended. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-8-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 124 ++++++++++++++++++++++++------------------ 1 file changed, 70 insertions(+), 54 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b89160718c04..4c6a686b08eb 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -493,6 +493,72 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) putchar('\n'); } +static int metricgroup__print_pmu_event(struct pmu_event *pe, + bool metricgroups, char *filter, + bool raw, bool details, + struct rblist *groups, + struct strlist *metriclist) +{ + const char *g; + char *omg, *mg; + + g = pe->metric_group; + if (!g && pe->metric_name) { + if (pe->name) + return 0; + g = "No_group"; + } + + if (!g) + return 0; + + mg = strdup(g); + + if (!mg) + return -ENOMEM; + omg = mg; + while ((g = strsep(&mg, ";")) != NULL) { + struct mep *me; + char *s; + + g = skip_spaces(g); + if (*g == 0) + g = "No_group"; + if (filter && !strstr(g, filter)) + continue; + if (raw) + s = (char *)pe->metric_name; + else { + if (asprintf(&s, "%s\n%*s%s]", + pe->metric_name, 8, "[", pe->desc) < 0) + return -1; + if (details) { + if (asprintf(&s, "%s\n%*s%s]", + s, 8, "[", pe->metric_expr) < 0) + return -1; + } + } + + if (!s) + continue; + + if (!metricgroups) { + strlist__add(metriclist, s); + } else { + me = mep_lookup(groups, g); + if (!me) + continue; + strlist__add(me->metrics, s); + } + + if (!raw) + free(s); + } + free(omg); + + return 0; +} + void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details) { @@ -517,66 +583,16 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, groups.node_cmp = mep_cmp; groups.node_delete = mep_delete; for (i = 0; ; i++) { - const char *g; pe = &map->table[i]; if (!pe->name && !pe->metric_group && !pe->metric_name) break; if (!pe->metric_expr) continue; - g = pe->metric_group; - if (!g && pe->metric_name) { - if (pe->name) - continue; - g = "No_group"; - } - if (g) { - char *omg; - char *mg = strdup(g); - - if (!mg) - return; - omg = mg; - while ((g = strsep(&mg, ";")) != NULL) { - struct mep *me; - char *s; - - g = skip_spaces(g); - if (*g == 0) - g = "No_group"; - if (filter && !strstr(g, filter)) - continue; - if (raw) - s = (char *)pe->metric_name; - else { - if (asprintf(&s, "%s\n%*s%s]", - pe->metric_name, 8, "[", pe->desc) < 0) - return; - - if (details) { - if (asprintf(&s, "%s\n%*s%s]", - s, 8, "[", pe->metric_expr) < 0) - return; - } - } - - if (!s) - continue; - - if (!metricgroups) { - strlist__add(metriclist, s); - } else { - me = mep_lookup(&groups, g); - if (!me) - continue; - strlist__add(me->metrics, s); - } - - if (!raw) - free(s); - } - free(omg); - } + if (metricgroup__print_pmu_event(pe, metricgroups, filter, + raw, details, &groups, + metriclist) < 0) + return; } if (!filter || !rblist__empty(&groups)) { -- cgit v1.3.1 From a36fadb17c27b4b5360db69acc80f5f4ad8dde7e Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:14 +0800 Subject: perf metricgroup: Support printing metric groups for system PMUs Currently printing metricgroups for core- or uncore-based events matched by CPUID is supported. Extend this for system events. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-9-git-send-email-john.garry@huawei.com [ Reorder 'struct metricgroup_print_sys_idata' field to avoid alignment holes ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 64 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4c6a686b08eb..6344e9627ff3 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -559,6 +559,49 @@ static int metricgroup__print_pmu_event(struct pmu_event *pe, return 0; } +struct metricgroup_print_sys_idata { + struct strlist *metriclist; + char *filter; + struct rblist *groups; + bool metricgroups; + bool raw; + bool details; +}; + +typedef int (*metricgroup_sys_event_iter_fn)(struct pmu_event *pe, void *); + +struct metricgroup_iter_data { + metricgroup_sys_event_iter_fn fn; + void *data; +}; + +static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) +{ + struct metricgroup_iter_data *d = data; + struct perf_pmu *pmu = NULL; + + if (!pe->metric_expr || !pe->compat) + return 0; + + while ((pmu = perf_pmu__scan(pmu))) { + + if (!pmu->id || strcmp(pmu->id, pe->compat)) + continue; + + return d->fn(pe, d->data); + } + + return 0; +} + +static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) +{ + struct metricgroup_print_sys_idata *d = data; + + return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw, + d->details, d->groups, d->metriclist); +} + void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details) { @@ -569,9 +612,6 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, struct rb_node *node, *next; struct strlist *metriclist = NULL; - if (!map) - return; - if (!metricgroups) { metriclist = strlist__new(NULL, NULL); if (!metriclist) @@ -582,7 +622,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, groups.node_new = mep_new; groups.node_cmp = mep_cmp; groups.node_delete = mep_delete; - for (i = 0; ; i++) { + for (i = 0; map; i++) { pe = &map->table[i]; if (!pe->name && !pe->metric_group && !pe->metric_name) @@ -595,6 +635,22 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, return; } + { + struct metricgroup_iter_data data = { + .fn = metricgroup__print_sys_event_iter, + .data = (void *) &(struct metricgroup_print_sys_idata){ + .metriclist = metriclist, + .metricgroups = metricgroups, + .filter = filter, + .raw = raw, + .details = details, + .groups = &groups, + }, + }; + + pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); + } + if (!filter || !rblist__empty(&groups)) { if (metricgroups && !raw) printf("\nMetric Groups:\n\n"); -- cgit v1.3.1 From be335ec28efa89d6bff8f4c6ce8daba88acf2b1a Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:15 +0800 Subject: perf metricgroup: Support adding metrics for system PMUs Currently adding metrics for core- or uncore-based events matched by CPUID is supported. Extend this for system events. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-10-git-send-email-john.garry@huawei.com [ Reorder 'struct metricgroup_add_iter_data' field to avoid alignment holes ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 66 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6344e9627ff3..ee94d3e8dd65 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -415,6 +415,12 @@ static bool match_metric(const char *n, const char *list) return false; } +static bool match_pe_metric(struct pmu_event *pe, const char *metric) +{ + return match_metric(pe->metric_group, metric) || + match_metric(pe->metric_name, metric); +} + struct mep { struct rb_node nd; const char *name; @@ -757,6 +763,16 @@ int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) return 1; } +struct metricgroup_add_iter_data { + struct list_head *metric_list; + const char *metric; + struct metric **m; + struct expr_ids *ids; + int *ret; + bool *has_match; + bool metric_no_group; +}; + static int __add_metric(struct list_head *metric_list, struct pmu_event *pe, bool metric_no_group, @@ -866,10 +882,11 @@ static int __add_metric(struct list_head *metric_list, return 0; } -#define map_for_each_event(__pe, __idx, __map) \ - for (__idx = 0, __pe = &__map->table[__idx]; \ - __pe->name || __pe->metric_group || __pe->metric_name; \ - __pe = &__map->table[++__idx]) +#define map_for_each_event(__pe, __idx, __map) \ + if (__map) \ + for (__idx = 0, __pe = &__map->table[__idx]; \ + __pe->name || __pe->metric_group || __pe->metric_name; \ + __pe = &__map->table[++__idx]) #define map_for_each_metric(__pe, __idx, __map, __metric) \ map_for_each_event(__pe, __idx, __map) \ @@ -1037,6 +1054,29 @@ static int add_metric(struct list_head *metric_list, return ret; } +static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, + void *data) +{ + struct metricgroup_add_iter_data *d = data; + int ret; + + if (!match_pe_metric(pe, d->metric)) + return 0; + + ret = add_metric(d->metric_list, pe, d->metric_no_group, d->m, NULL, d->ids); + if (ret) + return ret; + + ret = resolve_metric(d->metric_no_group, + d->metric_list, NULL, d->ids); + if (ret) + return ret; + + *(d->has_match) = true; + + return *d->ret; +} + static int metricgroup__add_metric(const char *metric, bool metric_no_group, struct strbuf *events, struct list_head *metric_list, @@ -1067,6 +1107,22 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, goto out; } + { + struct metricgroup_iter_data data = { + .fn = metricgroup__add_metric_sys_event_iter, + .data = (void *) &(struct metricgroup_add_iter_data) { + .metric_list = &list, + .metric = metric, + .metric_no_group = metric_no_group, + .m = &m, + .ids = &ids, + .has_match = &has_match, + .ret = &ret, + }, + }; + + pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); + } /* End of pmu events. */ if (!has_match) { ret = -EINVAL; @@ -1193,8 +1249,6 @@ int metricgroup__parse_groups(const struct option *opt, struct evlist *perf_evlist = *(struct evlist **)opt->value; struct pmu_events_map *map = perf_pmu__find_map(NULL); - if (!map) - return 0; return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, NULL, metric_events, map); -- cgit v1.3.1 From e15a536521ed7f48fac268152a78e6e2f99102d2 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 4 Dec 2020 19:10:16 +0800 Subject: perf vendor events: Add JSON metrics for imx8mm DDR Perf Add JSON metrics for imx8mm DDR Perf. Signed-off-by: Joakim Zhang Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Signed-off-by: John Garry Link: http://lore.kernel.org/lkml/1607080216-36968-11-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/freescale/imx8mm/sys/ddrc.json | 39 ++++++++++++++++++++++ .../arch/arm64/freescale/imx8mm/sys/metrics.json | 18 ++++++++++ tools/perf/pmu-events/jevents.c | 2 ++ 3 files changed, 59 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json create mode 100644 tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json new file mode 100644 index 000000000000..3b1cd708f568 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json @@ -0,0 +1,39 @@ +[ + { + "BriefDescription": "ddr cycles event", + "EventCode": "0x00", + "EventName": "imx8mm_ddr.cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "ddr read-cycles event", + "EventCode": "0x2a", + "EventName": "imx8mm_ddr.read_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "ddr write-cycles event", + "EventCode": "0x2b", + "EventName": "imx8mm_ddr.write_cycles", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "ddr read event", + "EventCode": "0x35", + "EventName": "imx8mm_ddr.read", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "ddr write event", + "EventCode": "0x38", + "EventName": "imx8mm_ddr.write", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + } +] + + diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json new file mode 100644 index 000000000000..8e553b67cae6 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "bytes all masters read from ddr based on read-cycles event", + "MetricName": "imx8mm_ddr_read.all", + "MetricExpr": "imx8mm_ddr.read_cycles * 4 * 4", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + }, + { + "BriefDescription": "bytes all masters write to ddr based on write-cycles event", + "MetricName": "imx8mm_ddr_write.all", + "MetricExpr": "imx8mm_ddr.write_cycles * 4 * 4", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx8_ddr", + "Compat": "i.MX8MM" + } +] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 214975c819ff..e1f3f5c8c550 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -281,6 +281,8 @@ static struct map { { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, { "hisi_sccl,hha", "hisi_sccl,hha" }, { "hisi_sccl,l3c", "hisi_sccl,l3c" }, + /* it's not realistic to keep adding these, we need something more scalable ... */ + { "imx8_ddr", "imx8_ddr" }, { "L3PMC", "amd_l3" }, { "DFPMC", "amd_df" }, {} -- cgit v1.3.1 From 03de8656c7778c5434cc2ca8e6b4699c1176c090 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Dec 2020 15:13:01 +0900 Subject: perf report: Support --header-only for pipe mode The --header-only checks file header and prints the feature data. But as pipe mode doesn't have it in the header it prints almost nothing. Change it to process first few records until it founds HEADER_FEATURE. Before: $ perf record -o- true | perf report -i- --header-only # ======== # captured on : Thu Dec 10 14:34:59 2020 # header version : 1 # data offset : 0 # data size : 0 # feat offset : 0 # ======== # After: $ perf record -o- true | perf report -i- --header-only # ======== # captured on : Thu Dec 10 14:49:11 2020 # header version : 1 # data offset : 0 # data size : 0 # feat offset : 0 # ======== # # hostname : balhae # os release : 5.7.17-1xxx # perf version : 5.10.rc6.gdb0ea13cc741 # arch : x86_64 # nrcpus online : 8 # nrcpus avail : 8 # cpudesc : Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz # cpuid : GenuineIntel,6,142,12 # total memory : 16158916 kB # cmdline : perf record -o- true # event : name = cycles, , id = { 81, 82, 83, 84, 85, 86, 87, 88 }, size = 120, ... # CPU_TOPOLOGY info available, use -I to display # NUMA_TOPOLOGY info available, use -I to display # pmu mappings: intel_pt = 9, intel_bts = 8, software = 1, power = 20, uprobe = 7, ... # time of first sample : 0.000000 # time of last sample : 0.000000 # sample duration : 0.000 ms # MEM_TOPOLOGY info available, use -I to display # cpu pmu capabilities: branches=32, max_precise=3, pmu_name=skylake Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20201210061302.88213-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5efbd0602f17..2a845d6cac09 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -226,6 +226,8 @@ static int process_feature_event(struct perf_session *session, pr_err("failed: wrong feature ID: %" PRI_lu64 "\n", event->feat.feat_id); return -1; + } else if (rep->header_only) { + session_done = 1; } /* @@ -1512,6 +1514,13 @@ repeat: perf_session__fprintf_info(session, stdout, report.show_full_info); if (report.header_only) { + if (data.is_pipe) { + /* + * we need to process first few records + * which contains PERF_RECORD_HEADER_FEATURE. + */ + perf_session__process_events(session); + } ret = 0; goto error; } -- cgit v1.3.1 From 96aea4daa6cb893d339d80ce14727e6421991d8b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Dec 2020 15:13:02 +0900 Subject: perf evlist: Support pipe mode display Likewise, perf evlist command should print event attributes by reading PERF_RECORD_HEADER_ATTR records. Before: $ perf record -o- true | ./perf evlist -i- (prints nothing) After: $ perf record -o- true | ./perf evlist -i- cycles:pppH Committer testing: Verbose mode also works as expected: $ perf record -o- true | perf evlist -i- cycles:uhH $ perf record -o- true | perf evlist -vi- cycles:uhH: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|PERIOD, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1 $ Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20201210061302.88213-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-evlist.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 98e992801251..4617b32c9c97 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -17,6 +17,14 @@ #include "util/data.h" #include "util/debug.h" #include +#include "util/tool.h" + +static int process_header_feature(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + session_done = 1; + return 0; +} static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) { @@ -27,12 +35,20 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details .mode = PERF_DATA_MODE_READ, .force = details->force, }; + struct perf_tool tool = { + /* only needed for pipe mode */ + .attr = perf_event__process_attr, + .feature = process_header_feature, + }; bool has_tracepoint = false; - session = perf_session__new(&data, 0, NULL); + session = perf_session__new(&data, 0, &tool); if (IS_ERR(session)) return PTR_ERR(session); + if (data.is_pipe) + perf_session__process_events(session); + evlist__for_each_entry(session->evlist, pos) { evsel__fprintf(pos, details, stdout); -- cgit v1.3.1 From 7cfcd1e016cce5a72b4b86a3882eb80565430f82 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Dec 2020 21:43:28 +0100 Subject: perf tools: Add evlist__disable_evsel/evlist__enable_evsel Adding interface to enable/disable single event in the evlist based on its name. It will be used later in new control enable/disable interface. Keeping the evlist::enabled true when one or more events are enabled so the toggle can work properly and toggle evlist to disabled state. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Acked-by: Alexei Budankov Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20201210204330.233864-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 69 +++++++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/evlist.h | 2 ++ 2 files changed, 68 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 89286f148012..05363a7247c4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -381,7 +381,30 @@ bool evsel__cpu_iter_skip(struct evsel *ev, int cpu) return true; } -void evlist__disable(struct evlist *evlist) +static int evsel__strcmp(struct evsel *pos, char *evsel_name) +{ + if (!evsel_name) + return 0; + if (evsel__is_dummy_event(pos)) + return 1; + return strcmp(pos->name, evsel_name); +} + +static int evlist__is_enabled(struct evlist *evlist) +{ + struct evsel *pos; + + evlist__for_each_entry(evlist, pos) { + if (!evsel__is_group_leader(pos) || !pos->core.fd) + continue; + /* If at least one event is enabled, evlist is enabled. */ + if (!pos->disabled) + return true; + } + return false; +} + +static void __evlist__disable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; struct affinity affinity; @@ -397,6 +420,8 @@ void evlist__disable(struct evlist *evlist) affinity__set(&affinity, cpu); evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; if (evsel__cpu_iter_skip(pos, cpu)) continue; if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) @@ -414,15 +439,34 @@ void evlist__disable(struct evlist *evlist) affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = true; } - evlist->enabled = false; + /* + * If we disabled only single event, we need to check + * the enabled state of the evlist manually. + */ + if (evsel_name) + evlist->enabled = evlist__is_enabled(evlist); + else + evlist->enabled = false; +} + +void evlist__disable(struct evlist *evlist) +{ + __evlist__disable(evlist, NULL); +} + +void evlist__disable_evsel(struct evlist *evlist, char *evsel_name) +{ + __evlist__disable(evlist, evsel_name); } -void evlist__enable(struct evlist *evlist) +static void __evlist__enable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; struct affinity affinity; @@ -435,6 +479,8 @@ void evlist__enable(struct evlist *evlist) affinity__set(&affinity, cpu); evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; if (evsel__cpu_iter_skip(pos, cpu)) continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) @@ -444,14 +490,31 @@ void evlist__enable(struct evlist *evlist) } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { + if (evsel__strcmp(pos, evsel_name)) + continue; if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = false; } + /* + * Even single event sets the 'enabled' for evlist, + * so the toggle can work properly and toggle to + * 'disabled' state. + */ evlist->enabled = true; } +void evlist__enable(struct evlist *evlist) +{ + __evlist__enable(evlist, NULL); +} + +void evlist__enable_evsel(struct evlist *evlist, char *evsel_name) +{ + __evlist__enable(evlist, evsel_name); +} + void evlist__toggle_enable(struct evlist *evlist) { (evlist->enabled ? evlist__disable : evlist__enable)(evlist); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 9b0c795736bb..1aae75895dea 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -186,6 +186,8 @@ size_t evlist__mmap_size(unsigned long pages); void evlist__disable(struct evlist *evlist); void evlist__enable(struct evlist *evlist); void evlist__toggle_enable(struct evlist *evlist); +void evlist__disable_evsel(struct evlist *evlist, char *evsel_name); +void evlist__enable_evsel(struct evlist *evlist, char *evsel_name); int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); -- cgit v1.3.1 From 8abceacff87d2fbb8e50e841d410e4808725151b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 12 Dec 2020 11:43:51 +0100 Subject: perf debug: Add debug_set_file function Allow to set debug output file via new debug_set_file function. It's called during perf startup in perf_debug_setup to set stderr file as default and any perf command can set it later to different file. It will be used in perf daemon command to get verbose output into log file. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201212104358.412065-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 9 ++++++++- tools/perf/util/debug.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 5cda5565777a..50fd6a4be4e0 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -30,6 +30,12 @@ bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; +static FILE *debug_file; + +void debug_set_file(FILE *file) +{ + debug_file = file; +} int veprintf(int level, int var, const char *fmt, va_list args) { @@ -39,7 +45,7 @@ int veprintf(int level, int var, const char *fmt, va_list args) if (use_browser >= 1 && !redirect_to_stderr) ui_helpline__vshow(fmt, args); else - ret = vfprintf(stderr, fmt, args); + ret = vfprintf(debug_file, fmt, args); } return ret; @@ -227,6 +233,7 @@ DEBUG_WRAPPER(debug, 1); void perf_debug_setup(void) { + debug_set_file(stderr); libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f1734abd98dd..43f712295645 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -5,6 +5,7 @@ #include #include +#include #include extern int verbose; @@ -62,6 +63,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5) int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +void debug_set_file(FILE *file); void perf_debug_setup(void); int perf_quiet_option(void); -- cgit v1.3.1 From 47dce51acc330eefef5ea876f7707585b402282a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:48 +0100 Subject: perf tools: Add support to read build id from compressed elf Adding support to decompress file before reading build id. Adding filename__read_build_id and change its current versions to read_build_id. Shutting down stderr output of perf list in the shell test: 82: Check open filename arg using perf trace + vfs_getname : Ok because with decompression code in the place we the filename__read_build_id function is more verbose in case of error and the test did not account for that. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 2 +- tools/perf/util/symbol-elf.c | 37 +++++++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 11cc2af13f2b..3d31c1d560d6 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -20,7 +20,7 @@ skip_if_no_perf_trace || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') perf trace -e $evts touch $file 2>&1 | \ egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 44dd86a4f25f..f3577f7d72fe 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -534,7 +534,7 @@ out: #ifdef HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); int err = -1; @@ -563,7 +563,7 @@ out_close: #else // HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); int fd, err = -1; @@ -595,6 +595,39 @@ out: #endif // HAVE_LIBBFD_BUILDID_SUPPORT +int filename__read_build_id(const char *filename, struct build_id *bid) +{ + struct kmod_path m = { .name = NULL, }; + char path[PATH_MAX]; + int err; + + if (!filename) + return -EFAULT; + + err = kmod_path__parse(&m, filename); + if (err) + return -1; + + if (m.comp) { + int error = 0, fd; + + fd = filename__decompress(filename, path, sizeof(path), m.comp, &error); + if (fd < 0) { + pr_debug("Failed to decompress (error %d) %s\n", + error, filename); + return -1; + } + close(fd); + filename = path; + } + + err = read_build_id(filename, bid); + + if (m.comp) + unlink(filename); + return err; +} + int sysfs__read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); -- cgit v1.3.1 From dc67d1920417140052976f3377fd216b87a50aad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Dec 2020 12:45:10 -0300 Subject: perf test: Make sample-parsing test aware of PERF_SAMPLE_{CODE,DATA}_PAGE_SIZE To fix this: $ perf test -v 27 27: Sample parsing : --- start --- test child forked, pid 586013 sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating test child finished with -1 ---- end ---- Sample parsing: FAILED! $ This patchset is still not completely merged, so when adding the PERF_SAMPLE_CODE_PAGE_SIZE to 'struct perf_sample' we need to add the bits added in this patch for 'perf_sample.data_page_size'. Fixes: 251cc77b8176de37 ("tools headers UAPI: Update tools's copy of linux/perf_event.h") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index a0bdaf390ac8..2393916f6128 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -154,6 +154,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_CGROUP) COMP(cgroup); + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) + COMP(data_page_size); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -234,6 +237,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) }, .phys_addr = 113, .cgroup = 114, + .data_page_size = 115, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, @@ -340,7 +344,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_CODE_PAGE_SIZE << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } -- cgit v1.3.1 From 526671bfc47df175eb87f96067d51b389a8af50d Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Wed, 16 Dec 2020 12:13:17 -0500 Subject: perf config: Fix example command in manpage to conform to syntax specified in the SYNOPSIS section. Committer testing: With the previously documented example: $ perf config --user report sort-order=srcline The config variable does not contain a section name: report $ With the fixed example line: $ perf config --user report.sort-order=srcline $ perf config --user report.sort-order report.sort-order=srcline $ Signed-off-by: Nick Thompson Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/linux-perf-users/20201217142619.GA14524@redhat.com/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 31069d8a5304..5c379adf8304 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -138,7 +138,7 @@ If you want to add or modify several config items, you can do like To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do - % perf config --user report sort-order=srcline + % perf config --user report.sort-order=srcline To change colors of selected line to other foreground and background colors in system config file (i.e. `$(sysconf)/perfconfig`), do -- cgit v1.3.1 From feca8a8342d3f53e394c9fc7d985b98ec0250ce1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Dec 2020 09:39:11 +0100 Subject: perf tools: Reformat record's control fd man text Adding available control commands in separate paragraph, so it's more readable and easier to add new commands. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201216083914.47215-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 2d30e525a600..34cf651ee237 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -637,11 +637,17 @@ endif::HAVE_LIBPFM[] --control=fifo:ctl-fifo[,ack-fifo]:: --control=fd:ctl-fd[,ack-fd]:: ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. -Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, -'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be -started with events disabled using --delay=-1 option. Optionally send control command -completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process. -Example of bash shell script to enable and disable events during measurements: +Listen on ctl-fd descriptor for command to control measurement. + +Available commands: + 'enable' : enable events + 'disable' : disable events + 'snapshot': AUX area tracing snapshot). + +Measurements can be started with events disabled using --delay=-1 option. Optionally +send control command completion ('ack\n') to ack-fd descriptor to synchronize with the +controlling process. Example of bash shell script to enable and disable events during +measurements: #!/bin/bash -- cgit v1.3.1 From 4262f8c3efa1e79bd5950437a3eea58eeb4c1c70 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Dec 2020 09:59:00 -0300 Subject: tools headers: Syncronize linux/build_bug.h with the kernel sources To pick up the changes in: 14dc3983b5dff513 ("kbuild: avoid static_assert for genksyms") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/build_bug.h' differs from latest version at 'include/linux/build_bug.h' diff -u tools/include/linux/build_bug.h include/linux/build_bug.h Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/build_bug.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index cc7070c7439b..ce365d212768 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,4 +79,9 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert +#ifdef __GENKSYMS__ +/* genksyms gets confused by _Static_assert */ +#define _Static_assert(expr, ...) +#endif + #endif /* _LINUX_BUILD_BUG_H */ -- cgit v1.3.1 From 1c28a05d1a972594164efc7fcffda416c5d6ab02 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Dec 2020 09:13:16 -0300 Subject: tools headers UAPI: Sync linux/stat.h with the kernel sources To pick the changes in: 72d1249e2ffdbc34 ("uapi: fix statx attribute value overlap for DAX & MOUNT_ROOT") That don't cause any change in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/stat.h' differs from latest version at 'include/uapi/linux/stat.h' diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h Cc: Adrian Hunter Cc: Eric Sandeen Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/stat.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 82cc58fe9368..1500a0f58041 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -171,9 +171,12 @@ struct statx { * be of use to ordinary userspace programs such as GUIs or ls rather than * specialised tools. * - * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags * semantically. Where possible, the numerical value is picked to correspond - * also. + * also. Note that the DAX attribute indicates that the file is in the CPU + * direct access state. It does not correspond to the per-inode flag that + * some filesystems support. + * */ #define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ #define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ @@ -183,7 +186,7 @@ struct statx { #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ -#define STATX_ATTR_DAX 0x00002000 /* [I] File is DAX */ +#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ #endif /* _UAPI_LINUX_STAT_H */ -- cgit v1.3.1 From 219d04992b689e0498ece02d2a451f2b6e2563a9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:34 +0100 Subject: mptcp: push pending frames when subflow has free space When multiple subflows are active, we can receive a window update on subflow with no write space available. MPTCP will try to push frames on such subflow and will fail. Pending frames will be pushed only after receiving a window update on a subflow with some wspace available. Overall the above could lead to suboptimal aggregate bandwidth usage. Instead, we should try to push pending frames as soon as the subflow reaches both conditions mentioned above. We can finally enable self-tests with asymmetric links, as the above makes them finally pass. Fixes: 6f8a612a33e4 ("mptcp: keep track of advertised windows right edge") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 13 ++++++++----- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 2 +- tools/testing/selftests/net/mptcp/simult_flows.sh | 6 +++--- 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index e8a1adf299d8..e0d21c0607e5 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -875,10 +875,13 @@ static void ack_update_msk(struct mptcp_sock *msk, new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; - if (after64(new_wnd_end, msk->wnd_end)) { + if (after64(new_wnd_end, msk->wnd_end)) msk->wnd_end = new_wnd_end; - __mptcp_wnd_updated(sk, ssk); - } + + /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ + if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) && + sk_stream_memory_free(ssk)) + __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { msk->snd_una = new_snd_una; @@ -944,8 +947,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) * helpers are cheap. */ mptcp_data_lock(subflow->conn); - if (mptcp_send_head(subflow->conn)) - __mptcp_wnd_updated(subflow->conn, sk); + if (sk_stream_memory_free(sk)) + __mptcp_check_push(subflow->conn, sk); __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 64c0c54c80e8..b53a91801a6c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2915,7 +2915,7 @@ void __mptcp_data_acked(struct sock *sk) mptcp_schedule_work(sk); } -void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk) +void __mptcp_check_push(struct sock *sk, struct sock *ssk) { if (!mptcp_send_head(sk)) return; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7cf9d110b85f..d67de793d363 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -503,7 +503,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); -void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk); +void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 2f649b431456..f039ee57eb3c 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -287,7 +287,7 @@ run_test 10 10 0 0 "balanced bwidth" run_test 10 10 1 50 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -# run_test 30 10 0 0 "unbalanced bwidth" -# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 30 10 0 0 "unbalanced bwidth" +run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" +run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" exit $ret -- cgit v1.3.1 From e79bb299ccad6983876686a4d8c87c92ebbe5657 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 22:35:39 +0000 Subject: selftests/bpf: Fix spelling mistake "tranmission" -> "transmission" There are two spelling mistakes in output messages. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201214223539.83168-1-colin.king@canonical.com --- tools/testing/selftests/bpf/xdpxceiver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 014dedaa4dd2..1e722ee76b1f 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -715,7 +715,7 @@ static void worker_pkt_dump(void) int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); if (payload == EOT) { - ksft_print_msg("End-of-tranmission frame received\n"); + ksft_print_msg("End-of-transmission frame received\n"); fprintf(stdout, "---------------------------------------\n"); break; } @@ -747,7 +747,7 @@ static void worker_pkt_validate(void) } if (payloadseqnum == EOT) { - ksft_print_msg("End-of-tranmission frame received: PASS\n"); + ksft_print_msg("End-of-transmission frame received: PASS\n"); sigvar = 1; break; } -- cgit v1.3.1 From 4bba4c4bb09ad4a2b70836725e08439c86d8f9e4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 18 Dec 2020 09:59:05 -0300 Subject: tools headers: Get tools's linux/compiler.h closer to the kernel's We're cherry picking stuff from the kernel to allow for the other headers that we keep in sync via tools/perf/check-headers.sh to work, so introduce linux/compiler_types.h and from there get the compiler specific stuff. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler.h | 4 +--- tools/include/linux/compiler_types.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 tools/include/linux/compiler_types.h (limited to 'tools') diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index d22a974372c0..ff872dc2637c 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -2,9 +2,7 @@ #ifndef _TOOLS_LINUX_COMPILER_H_ #define _TOOLS_LINUX_COMPILER_H_ -#ifdef __GNUC__ -#include -#endif +#include #ifndef __compiletime_error # define __compiletime_error(message) diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h new file mode 100644 index 000000000000..31fc2caa758a --- /dev/null +++ b/tools/include/linux/compiler_types.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_COMPILER_TYPES_H +#define __LINUX_COMPILER_TYPES_H + +/* Compiler specific macros. */ +#ifdef __GNUC__ +#include +#endif + +#endif /* __LINUX_COMPILER_TYPES_H */ -- cgit v1.3.1 From ffb9beb13e8daf3fcb6bab470d07962b05d619b7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 18 Dec 2020 10:16:08 -0300 Subject: tools headers: Add conditional __has_builtin() As it'll be used by the ctype.h sync with its kernel source original. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler_types.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 31fc2caa758a..feea09029f61 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -2,6 +2,17 @@ #ifndef __LINUX_COMPILER_TYPES_H #define __LINUX_COMPILER_TYPES_H +/* Builtins */ + +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + /* Compiler specific macros. */ #ifdef __GNUC__ #include -- cgit v1.3.1 From 23cd9543a52b96ac75d666eee3576b47f1901248 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:41:21 -0300 Subject: tools headers: Update linux/ctype.h with the kernel sources To pick up the changes in: caabdd0f59a9771e ("ctype.h: remove duplicate isdigit() helper") Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/ctype.h' differs from latest version at 'include/linux/ctype.h' diff -u tools/include/linux/ctype.h include/linux/ctype.h And we need to continue using the combination of: inline __isdigit() #define isdigit() __isdigit When the __has_builtin() thing isn't available, as it is a builtin in older systems with it as a builtin but with compilers not hacinv __has_builtin(), rendering the __has_builtin() check useless otherwise. Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/ctype.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/ctype.h b/tools/include/linux/ctype.h index 310090b4c474..29ed3fe94404 100644 --- a/tools/include/linux/ctype.h +++ b/tools/include/linux/ctype.h @@ -2,6 +2,8 @@ #ifndef _LINUX_CTYPE_H #define _LINUX_CTYPE_H +#include + /* * NOTE! This ctype does not handle EOF like the standard C * library is required to. @@ -23,11 +25,6 @@ extern const unsigned char _ctype[]; #define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) #define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) #define iscntrl(c) ((__ismask(c)&(_C)) != 0) -static inline int __isdigit(int c) -{ - return '0' <= c && c <= '9'; -} -#define isdigit(c) __isdigit(c) #define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) #define islower(c) ((__ismask(c)&(_L)) != 0) #define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) @@ -40,6 +37,16 @@ static inline int __isdigit(int c) #define isascii(c) (((unsigned char)(c))<=0x7f) #define toascii(c) (((unsigned char)(c))&0x7f) +#if __has_builtin(__builtin_isdigit) +#define isdigit(c) __builtin_isdigit(c) +#else +static inline int __isdigit(int c) +{ + return '0' <= c && c <= '9'; +} +#define isdigit(c) __isdigit(c) +#endif + static inline unsigned char __tolower(unsigned char c) { if (isupper(c)) -- cgit v1.3.1 From eb2842da77e1f7a3c46033f930524ab76dffe67a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:42:48 -0300 Subject: perf trace beauty: Update copy of linux/socket.h with the kernel sources This just triggers the rebuilding of the syscall beautifiers that extract patterns from this file due to this cset: b713c195d5933227 ("net: provide __sys_shutdown_sock() that takes a socket") After updating it: CC /tmp/build/perf/trace/beauty/sockaddr.o Addressing this perf build warning: Warning: Kernel ABI header at 'tools/perf/trace/beauty/include/linux/socket.h' differs from latest version at 'include/linux/socket.h' diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jens Axboe Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index e9cb30d8cbfb..385894b4a8bb 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); +extern int __sys_shutdown_sock(struct socket *sock, int how); extern int __sys_shutdown(int fd, int how); extern struct ns_common *get_net_ns(struct ns_common *ns); -- cgit v1.3.1 From e9bde94f1eb53c5721ba8e477dee837632fedebe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:46:24 -0300 Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: d205e0f1426e0f99 ("x86/{cpufeatures,msr}: Add Intel SGX Launch Control hardware bits") e7b6385b01d8e9fb ("x86/cpufeatures: Add Intel SGX hardware bits") 43756a298928c9a4 ("powercap: Add AMD Fam17h RAPL support") 298ed2b31f552806 ("x86/msr-index: sort AMD RAPL MSRs by address") 68299a42f8428853 ("x86/mce: Enable additional error logging on certain Intel CPUs") That cause these changes in tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2020-12-17 14:45:49.036994450 -0300 +++ after 2020-12-17 14:46:01.654256639 -0300 @@ -22,6 +22,10 @@ [0x00000060] = "LBR_CORE_TO", [0x00000079] = "IA32_UCODE_WRITE", [0x0000008b] = "IA32_UCODE_REV", + [0x0000008C] = "IA32_SGXLEPUBKEYHASH0", + [0x0000008D] = "IA32_SGXLEPUBKEYHASH1", + [0x0000008E] = "IA32_SGXLEPUBKEYHASH2", + [0x0000008F] = "IA32_SGXLEPUBKEYHASH3", [0x0000009b] = "IA32_SMM_MONITOR_CTL", [0x0000009e] = "IA32_SMBASE", [0x000000c1] = "IA32_PERFCTR0", @@ -59,6 +63,7 @@ [0x00000179] = "IA32_MCG_CAP", [0x0000017a] = "IA32_MCG_STATUS", [0x0000017b] = "IA32_MCG_CTL", + [0x0000017f] = "ERROR_CONTROL", [0x00000180] = "IA32_MCG_EAX", [0x00000181] = "IA32_MCG_EBX", [0x00000182] = "IA32_MCG_ECX", @@ -294,6 +299,7 @@ [0xc0010241 - x86_AMD_V_KVM_MSRs_offset] = "F15H_NB_PERF_CTR", [0xc0010280 - x86_AMD_V_KVM_MSRs_offset] = "F15H_PTSC", [0xc0010299 - x86_AMD_V_KVM_MSRs_offset] = "AMD_RAPL_POWER_UNIT", + [0xc001029a - x86_AMD_V_KVM_MSRs_offset] = "AMD_CORE_ENERGY_STATUS", [0xc001029b - x86_AMD_V_KVM_MSRs_offset] = "AMD_PKG_ENERGY_STATUS", [0xc00102f0 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN_CTL", [0xc00102f1 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN", $ Which causes these parts of tools/perf/ to be rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf At some point these should just be tables read by perf on demand. This allows 'perf trace' users to use those strings to translate from the msr ids provided by the msr: tracepoints. This addresses this perf tools build warning: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' Cc: Adrian Hunter Cc: Borislav Petkov Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Rafael J. Wysocki Cc: Sean Christopherson Cc: Tony Luck Cc: Victor Ding Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 972a34d93505..2b5fc9accec4 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -139,6 +139,7 @@ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_ERROR_CONTROL 0x0000017f #define MSR_IA32_MCG_EXT_CTL 0x000004d0 #define MSR_OFFCORE_RSP_0 0x000001a6 @@ -326,8 +327,9 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 -#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b #define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 +#define MSR_AMD_CORE_ENERGY_STATUS 0xc001029a +#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b /* Config TDP MSRs */ #define MSR_CONFIG_TDP_NOMINAL 0x00000648 @@ -609,6 +611,8 @@ #define FEAT_CTL_LOCKED BIT(0) #define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_SGX_LC_ENABLED BIT(17) +#define FEAT_CTL_SGX_ENABLED BIT(18) #define FEAT_CTL_LMCE_ENABLED BIT(20) #define MSR_IA32_TSC_ADJUST 0x0000003b @@ -628,6 +632,12 @@ #define MSR_IA32_UCODE_WRITE 0x00000079 #define MSR_IA32_UCODE_REV 0x0000008b +/* Intel SGX Launch Enclave Public Key Hash MSRs */ +#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C +#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D +#define MSR_IA32_SGXLEPUBKEYHASH2 0x0000008E +#define MSR_IA32_SGXLEPUBKEYHASH3 0x0000008F + #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b #define MSR_IA32_SMBASE 0x0000009e -- cgit v1.3.1 From 7ddcdea5b54492f54700f427f58690cf1e187e5e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:55:01 -0300 Subject: tools headers UAPI: Sync linux/const.h with the kernel headers To pick up the changes in: a85cbe6159ffc973 ("uapi: move constants from to ") That causes no changes in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/const.h' differs from latest version at 'include/uapi/linux/const.h' diff -u tools/include/uapi/linux/const.h include/uapi/linux/const.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Petr Vorel Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/const.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index 5ed721ad5b19..af2a44c08683 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,4 +28,9 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* _UAPI_LINUX_CONST_H */ -- cgit v1.3.1 From 4a443a51776ca9847942523cf987a330894d3a31 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:58:51 -0300 Subject: tools headers UAPI: Sync linux/fscrypt.h with the kernel sources To pick the changes from: 3ceb6543e9cf6ed8 ("fscrypt: remove kernel-internal constants from UAPI header") That don't result in any changes in tooling, just addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h' diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index e5de60336938..9f4428be3e36 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -20,7 +20,6 @@ #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10 -#define FSCRYPT_POLICY_FLAGS_VALID 0x1F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 @@ -28,7 +27,7 @@ #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 #define FSCRYPT_MODE_ADIANTUM 9 -#define __FSCRYPT_MODE_MAX 9 +/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */ /* * Legacy policy version; ad-hoc KDF and no key verification. @@ -177,7 +176,7 @@ struct fscrypt_get_key_status_arg { #define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32 #define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK #define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY -#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID +#define FS_POLICY_FLAGS_VALID 0x07 /* contains old flags only */ #define FS_ENCRYPTION_MODE_INVALID 0 /* never used */ #define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS #define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */ -- cgit v1.3.1 From d6dbfceec5dd41becbe8c47c402240925d31036a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 15:01:08 -0300 Subject: tools headers UAPI: Sync linux/prctl.h with the kernel sources To pick a new prctl introduced in: 1446e1df9eb183fd ("kernel: Implement selective syscall userspace redirection") That results in: $ tools/perf/trace/beauty/prctl_option.sh > before $ cp include/uapi/linux/prctl.h tools/include/uapi/linux/prctl.h $ tools/perf/trace/beauty/prctl_option.sh > after $ diff -u before after --- before 2020-12-17 15:00:42.012537367 -0300 +++ after 2020-12-17 15:00:49.832699463 -0300 @@ -53,6 +53,7 @@ [56] = "GET_TAGGED_ADDR_CTRL", [57] = "SET_IO_FLUSHER", [58] = "GET_IO_FLUSHER", + [59] = "SET_SYSCALL_USER_DISPATCH", }; static const char *prctl_set_mm_options[] = { [1] = "START_CODE", $ Now users can do: # perf trace -e syscalls:sys_enter_prctl --filter "option==SET_SYSCALL_USER_DISPATCH" ^C# # trace -v -e syscalls:sys_enter_prctl --filter "option==SET_SYSCALL_USER_DISPATCH" New filter for syscalls:sys_enter_prctl: (option==0x3b) && (common_pid != 5519 && common_pid != 3404) ^C# And also when prctl appears in a session, its options will be translated to the string. Cc: Adrian Hunter Cc: Gabriel Krisman Bertazi Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 7f0827705c9a..90deb41c8a34 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -247,4 +247,9 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +/* Dispatch syscalls to a userspace handler */ +#define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.3.1 From f93c789a3e245707e3eddcaab5c2b7c62615692d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 15:44:29 -0300 Subject: tools headers cpufeatures: Sync with the kernel sources To pick the changes in: e7b6385b01d8e9fb ("x86/cpufeatures: Add Intel SGX hardware bits") That causes only these 'perf bench' objects to rebuild: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Sean Christopherson Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 2 ++ tools/arch/x86/include/asm/disabled-features.h | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index dad350d42ecf..f5ef2d5b9231 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -241,6 +241,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ @@ -356,6 +357,7 @@ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ #define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 5861d34f9771..7947cb1782da 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -62,6 +62,12 @@ # define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #endif +#ifdef CONFIG_X86_SGX +# define DISABLE_SGX 0 +#else +# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -74,7 +80,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_SMAP) +#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 -- cgit v1.3.1 From b53d4872d2cfbce117abedee2a29a93e624e4e32 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 15:48:06 -0300 Subject: tools headers UAPI: Update asm-generic/unistd.h Just a comment change, trivial. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 2056318988f7..fc48c64700eb 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -517,7 +517,7 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex) #endif -/* kernel/timer.c */ +/* kernel/sys.c */ #define __NR_getpid 172 __SYSCALL(__NR_getpid, sys_getpid) #define __NR_getppid 173 -- cgit v1.3.1 From 697d1549140cdcdc4cfcd0bf94e62643008972b7 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:03 +0800 Subject: tools/virtio: include asm/bug.h WARN_ON is used in drivers/vhost/vringh.c, to avoid build failure, need include asm/bug.h Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-2-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/bug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h index b14c2c3b6b85..813baf13f62a 100644 --- a/tools/virtio/linux/bug.h +++ b/tools/virtio/linux/bug.h @@ -2,6 +2,8 @@ #ifndef BUG_H #define BUG_H +#include + #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) #define BUILD_BUG_ON(x) -- cgit v1.3.1 From b9ca93bcd186ec4144df91c619f6084cdad500ec Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:04 +0800 Subject: tools/virtio: add krealloc_array krealloc_array is used in drivers/vhost/vringh.c, add it to avoid build failure. Drop WARN_ON_ONCE, because duplicated with the one in bug.h Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-3-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/kernel.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 315e85cabeda..0b493542e61a 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr) # define unlikely(x) (__builtin_expect(!!(x), 0)) # endif +static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, gfp); +} + #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #ifdef DEBUG #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) @@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0) - #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ -- cgit v1.3.1 From 1a5514cbb09aaf694d26ef26fd6da5c5d495cc22 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:05 +0800 Subject: tools/virtio: add barrier for aarch64 Add barrier for aarch64 for cross compiling, and most are from Linux Kernel. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-4-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/asm/barrier.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index 04d563fc9b95..468435ed64e6 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -16,6 +16,16 @@ # define mb() abort() # define dma_rmb() abort() # define dma_wmb() abort() +#elif defined(__aarch64__) +#define dmb(opt) asm volatile("dmb " #opt : : : "memory") +#define virt_mb() __sync_synchronize() +#define virt_rmb() dmb(ishld) +#define virt_wmb() dmb(ishst) +#define virt_store_mb(var, value) do { WRITE_ONCE(var, value); dmb(ish); } while (0) +/* Weak barriers should be used. If not - it's a bug */ +# define mb() abort() +# define dma_rmb() abort() +# define dma_wmb() abort() #else #error Please fill in barrier macros #endif -- cgit v1.3.1 From ca202504ea6f04b2e724741100ab63f8f018a8af Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 18 Dec 2020 15:54:12 +0100 Subject: selftests/core: fix close_range_test build after XFAIL removal XFAIL was removed in commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") and its use in close_range_test was already replaced by commit 1d44d0dd61b6 ("selftests: core: use SKIP instead of XFAIL in close_range_test.c"). However, commit 23afeaeff3d9 ("selftests: core: add tests for CLOSE_RANGE_CLOEXEC") introduced usage of XFAIL in TEST(close_range_cloexec). Use SKIP there as well. Fixes: 23afeaeff3d9 ("selftests: core: add tests for CLOSE_RANGE_CLOEXEC") Cc: Giuseppe Scrivano Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Tobias Klauser Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20201218112428.13662-1-tklauser@distanz.ch Link: https://lore.kernel.org/r/20201218145415.801063-1-christian.brauner@ubuntu.com Signed-off-by: Christian Brauner --- tools/testing/selftests/core/close_range_test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 87e16d65d9d7..c97dd1a7abd6 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -102,7 +102,7 @@ TEST(close_range_unshare) int i, ret, status; pid_t pid; int open_fds[101]; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; @@ -191,7 +191,7 @@ TEST(close_range_unshare_capped) int i, ret, status; pid_t pid; int open_fds[101]; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; @@ -241,7 +241,7 @@ TEST(close_range_cloexec) fd = open("/dev/null", O_RDONLY); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -250,9 +250,9 @@ TEST(close_range_cloexec) ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); if (ret < 0) { if (errno == ENOSYS) - XFAIL(return, "close_range() syscall not supported"); + SKIP(return, "close_range() syscall not supported"); if (errno == EINVAL) - XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); + SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); } /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ -- cgit v1.3.1 From ae78ba8d3bb66dfe8c0f7b7ec5ffe3f6a13feb86 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 18 Dec 2020 15:54:13 +0100 Subject: selftests/core: handle missing syscall number for close_range This improves the syscall number handling in the close_range() selftests. This should handle any architecture. Cc: Giuseppe Scrivano Cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20201218145415.801063-2-christian.brauner@ubuntu.com Signed-off-by: Christian Brauner --- tools/testing/selftests/core/close_range_test.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c97dd1a7abd6..bc592a1372bb 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -17,7 +17,23 @@ #include "../clone3/clone3_selftests.h" #ifndef __NR_close_range -#define __NR_close_range -1 + #if defined __alpha__ + #define __NR_close_range 546 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_close_range (436 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_close_range (436 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_close_range (436 + 5000) + #endif + #elif defined __ia64__ + #define __NR_close_range (436 + 1024) + #else + #define __NR_close_range 436 + #endif #endif #ifndef CLOSE_RANGE_UNSHARE -- cgit v1.3.1 From fe325c3ff3188d551668c5847bac58463b9f3437 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 18 Dec 2020 15:54:14 +0100 Subject: selftests/core: add test for CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC Add a test to verify that CLOSE_RANGE_UNSHARE works correctly when combined with CLOSE_RANGE_CLOEXEC for the single-threaded case. Cc: Giuseppe Scrivano Cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20201218145415.801063-3-christian.brauner@ubuntu.com Signed-off-by: Christian Brauner --- tools/testing/selftests/core/close_range_test.c | 70 +++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index bc592a1372bb..862444f1c244 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -313,5 +313,75 @@ TEST(close_range_cloexec) } } +TEST(close_range_cloexec_unshare) +{ + int i, ret; + int open_fds[101]; + struct rlimit rlimit; + + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { + int fd; + + fd = open("/dev/null", O_RDONLY); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + SKIP(return, "Skipping test since /dev/null does not exist"); + } + + open_fds[i] = fd; + } + + ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); + if (ret < 0) { + if (errno == ENOSYS) + SKIP(return, "close_range() syscall not supported"); + if (errno == EINVAL) + SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); + } + + /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ + ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); + rlimit.rlim_cur = 25; + ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); + + /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ + ret = sys_close_range(open_fds[0], open_fds[50], + CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); + ASSERT_EQ(0, ret); + ret = sys_close_range(open_fds[75], open_fds[100], + CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); + ASSERT_EQ(0, ret); + + for (i = 0; i <= 50; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + for (i = 51; i <= 74; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + } + + for (i = 75; i <= 100; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + /* Test a common pattern. */ + ret = sys_close_range(3, UINT_MAX, + CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); + for (i = 0; i <= 100; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } +} TEST_HARNESS_MAIN -- cgit v1.3.1 From 6abc20f8f879d891930f37186b19c9dc3ecc34dd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 18 Dec 2020 15:54:15 +0100 Subject: selftests/core: add regression test for CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC This test is a minimalized version of the reproducer given by syzbot (cf. [1]). After introducing CLOSE_RANGE_CLOEXEC syzbot reported a crash when CLOSE_RANGE_CLOEXEC is specified in conjunction with CLOSE_RANGE_UNSHARE. When CLOSE_RANGE_UNSHARE is specified the caller will receive a private file descriptor table in case their file descriptor table is currently shared. For the case where the caller has requested all file descriptors to be actually closed via e.g. close_range(3, ~0U, 0) the kernel knows that the caller does not need any of the file descriptors anymore and will optimize the close operation by only copying all files in the range from 0 to 3 and no others. However, if the caller requested CLOSE_RANGE_CLOEXEC together with CLOSE_RANGE_UNSHARE the caller wants to still make use of the file descriptors so the kernel needs to copy all of them and can't optimize. The original patch didn't account for this and thus could cause oopses as evidenced by the syzbot report. Add tests for this regression. We first create a huge gap in the fd table. When we now call CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper bound the kernel will only copy up to fd1 file descriptors into the new fd table. If the kernel is buggy and doesn't handle CLOSE_RANGE_CLOEXEC correctly it will not have copied all file descriptors and we will oops! This test passes on a fixed kernel and will trigger an oops on a buggy kernel. [1]: https://syzkaller.appspot.com/text?tag=KernelConfig&x=db720fe37a6a41d8 Cc: Giuseppe Scrivano Cc: linux-fsdevel@vger.kernel.org Link: syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20201218145415.801063-4-christian.brauner@ubuntu.com Signed-off-by: Christian Brauner --- tools/testing/selftests/core/close_range_test.c | 183 ++++++++++++++++++++++++ 1 file changed, 183 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 862444f1c244..73eb29c916d1 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -384,4 +384,187 @@ TEST(close_range_cloexec_unshare) } } +/* + * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com + */ +TEST(close_range_cloexec_syzbot) +{ + int fd1, fd2, fd3, flags, ret, status; + pid_t pid; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* Create a huge gap in the fd table. */ + fd1 = open("/dev/null", O_RDWR); + EXPECT_GT(fd1, 0); + + fd2 = dup2(fd1, 1000); + EXPECT_GT(fd2, 0); + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC); + if (ret) + exit(EXIT_FAILURE); + + /* + * We now have a private file descriptor table and all + * our open fds should still be open but made + * close-on-exec. + */ + flags = fcntl(fd1, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + flags = fcntl(fd2, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + fd3 = dup2(fd1, 42); + EXPECT_GT(fd3, 0); + + /* + * Duplicating the file descriptor must remove the + * FD_CLOEXEC flag. + */ + flags = fcntl(fd3, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* + * We had a shared file descriptor table before along with requesting + * close-on-exec so the original fds must not be close-on-exec. + */ + flags = fcntl(fd1, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + flags = fcntl(fd2, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + fd3 = dup2(fd1, 42); + EXPECT_GT(fd3, 0); + + flags = fcntl(fd3, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + EXPECT_EQ(close(fd1), 0); + EXPECT_EQ(close(fd2), 0); + EXPECT_EQ(close(fd3), 0); +} + +/* + * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com + */ +TEST(close_range_cloexec_unshare_syzbot) +{ + int i, fd1, fd2, fd3, flags, ret, status; + pid_t pid; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* + * Create a huge gap in the fd table. When we now call + * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper + * bound the kernel will only copy up to fd1 file descriptors into the + * new fd table. If the kernel is buggy and doesn't handle + * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file + * descriptors and we will oops! + * + * On a buggy kernel this should immediately oops. But let's loop just + * to be sure. + */ + fd1 = open("/dev/null", O_RDWR); + EXPECT_GT(fd1, 0); + + fd2 = dup2(fd1, 1000); + EXPECT_GT(fd2, 0); + + for (i = 0; i < 100; i++) { + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE | + CLOSE_RANGE_CLOEXEC); + if (ret) + exit(EXIT_FAILURE); + + /* + * We now have a private file descriptor table and all + * our open fds should still be open but made + * close-on-exec. + */ + flags = fcntl(fd1, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + flags = fcntl(fd2, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + fd3 = dup2(fd1, 42); + EXPECT_GT(fd3, 0); + + /* + * Duplicating the file descriptor must remove the + * FD_CLOEXEC flag. + */ + flags = fcntl(fd3, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + EXPECT_EQ(close(fd1), 0); + EXPECT_EQ(close(fd2), 0); + EXPECT_EQ(close(fd3), 0); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + } + + /* + * We created a private file descriptor table before along with + * requesting close-on-exec so the original fds must not be + * close-on-exec. + */ + flags = fcntl(fd1, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + flags = fcntl(fd2, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + fd3 = dup2(fd1, 42); + EXPECT_GT(fd3, 0); + + flags = fcntl(fd3, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + EXPECT_EQ(close(fd1), 0); + EXPECT_EQ(close(fd2), 0); + EXPECT_EQ(close(fd3), 0); +} + TEST_HARNESS_MAIN -- cgit v1.3.1 From e9ce39b5b390e0e5944a46328cb0a18d132de532 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Dec 2020 14:05:44 -0800 Subject: selftests/filesystems: expand epoll with epoll_pwait2 Code coverage for the epoll_pwait2 syscall. epoll62: Repeat basic test epoll1, but exercising the new syscall. epoll63: Pass a timespec and exercise the timeout wakeup path. Link: https://lkml.kernel.org/r/20201121144401.3727659-5-willemdebruijn.kernel@gmail.com Signed-off-by: Willem de Bruijn Cc: Al Viro Cc: Arnd Bergmann Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../filesystems/epoll/epoll_wakeup_test.c | 72 ++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index 8f82f99f7748..ad7fabd575f9 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE +#include +#include #include #include #include @@ -21,6 +23,19 @@ struct epoll_mtcontext pthread_t waiter; }; +#ifndef __NR_epoll_pwait2 +#define __NR_epoll_pwait2 -1 +#endif + +static inline int sys_epoll_pwait2(int fd, struct epoll_event *events, + int maxevents, + const struct __kernel_timespec *timeout, + const sigset_t *sigset, size_t sigsetsize) +{ + return syscall(__NR_epoll_pwait2, fd, events, maxevents, timeout, + sigset, sigsetsize); +} + static void signal_handler(int signum) { } @@ -3377,4 +3392,61 @@ TEST(epoll61) close(ctx.evfd); } +/* Equivalent to basic test epoll1, but exercising epoll_pwait2. */ +TEST(epoll62) +{ + int efd; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1); + EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* Epoll_pwait2 basic timeout test. */ +TEST(epoll63) +{ + const int cfg_delay_ms = 10; + unsigned long long tdiff; + struct __kernel_timespec ts; + int efd; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ts.tv_sec = 0; + ts.tv_nsec = cfg_delay_ms * 1000 * 1000; + + tdiff = msecs(); + EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, &ts, NULL, 0), 0); + tdiff = msecs() - tdiff; + + EXPECT_GE(tdiff, cfg_delay_ms); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + TEST_HARNESS_MAIN -- cgit v1.3.1 From 6b9bae63de4fe24365ad0c2d23e77ae06f8c58e4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:57 -0800 Subject: perf script: Support data page size Display the data page size if it is available and asked by the user: Can be configured by the user, for example: perf script --fields comm,event,phys_addr,data_page_size dtlb mem-loads:uP: 3fec82ea8 4K dtlb mem-loads:uP: 3fec82e90 4K dtlb mem-loads:uP: 3e23700a4 4K dtlb mem-loads:uP: 3fec82f20 4K dtlb mem-loads:uP: 3e23700a4 4K dtlb mem-loads:uP: 3b4211bec 4K dtlb mem-loads:uP: 382205dc0 2M dtlb mem-loads:uP: 36fa082c0 2M dtlb mem-loads:uP: 377607340 2M dtlb mem-loads:uP: 330010180 2M dtlb mem-loads:uP: 33200fd80 2M dtlb mem-loads:uP: 31b012b80 2M Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 5 +++-- tools/perf/builtin-script.c | 17 +++++++++++++++-- tools/perf/util/event.h | 3 +++ tools/perf/util/session.c | 13 +++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 4f712fb8f175..44d37210fc8f 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,8 +116,9 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc. + srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, + brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, + metric, misc, srccode, ipc, data_page_size. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 1c322c129185..edacfa98d073 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -30,6 +30,7 @@ #include "util/thread-stack.h" #include "util/time-utils.h" #include "util/path.h" +#include "util/event.h" #include "ui/ui.h" #include "print_binary.h" #include "archinsn.h" @@ -115,6 +116,7 @@ enum perf_output_field { PERF_OUTPUT_SRCCODE = 1ULL << 30, PERF_OUTPUT_IPC = 1ULL << 31, PERF_OUTPUT_TOD = 1ULL << 32, + PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, }; struct perf_script { @@ -179,6 +181,7 @@ struct output_option { {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, {.str = "ipc", .field = PERF_OUTPUT_IPC}, {.str = "tod", .field = PERF_OUTPUT_TOD}, + {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, }; enum { @@ -251,7 +254,8 @@ static struct { PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | - PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR, + PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | + PERF_OUTPUT_DATA_PAGE_SIZE, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -499,6 +503,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) return -EINVAL; + if (PRINT_FIELD(DATA_PAGE_SIZE) && + evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE)) + return -EINVAL; + return 0; } @@ -1920,6 +1928,7 @@ static void process_event(struct perf_script *script, unsigned int type = output_type(attr->type); struct evsel_script *es = evsel->priv; FILE *fp = es->fp; + char str[PAGE_SIZE_NAME_LEN]; if (output[type].fields == 0) return; @@ -2008,6 +2017,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(PHYS_ADDR)) fprintf(fp, "%16" PRIx64, sample->phys_addr); + if (PRINT_FIELD(DATA_PAGE_SIZE)) + fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str)); + perf_sample__fprintf_ipc(sample, attr, fp); fprintf(fp, "\n"); @@ -3506,7 +3518,8 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," - "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod", + "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," + "data_page_size", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 448ac30c2fc4..ff403ea578e1 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -409,4 +409,7 @@ extern int sysctl_perf_event_max_stack; extern int sysctl_perf_event_max_contexts_per_stack; extern unsigned int proc_map_timeout; +#define PAGE_SIZE_NAME_LEN 32 +char *get_page_size_name(u64 size, char *str); + #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b3c50b12791..50ff9795a4f1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -32,6 +32,7 @@ #include "ui/progress.h" #include "../perf.h" #include "arch/common.h" +#include "units.h" #include #ifdef HAVE_ZSTD_SUPPORT @@ -1258,10 +1259,19 @@ static void dump_event(struct evlist *evlist, union perf_event *event, event->header.size, perf_event__name(event->header.type)); } +char *get_page_size_name(u64 size, char *str) +{ + if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size)) + snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A"); + + return str; +} + static void dump_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *sample) { u64 sample_type; + char str[PAGE_SIZE_NAME_LEN]; if (!dump_trace) return; @@ -1296,6 +1306,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_PHYS_ADDR) printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr); + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str)); + if (sample_type & PERF_SAMPLE_TRANSACTION) printf("... transaction: %" PRIx64 "\n", sample->transaction); -- cgit v1.3.1 From a50d03e3b8b68df13e47dcbde6c5d39b4237c479 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:58 -0800 Subject: perf sort: Add sort option for data page size Add a new sort option "data_page_size" for --mem-mode sort. With this option applied, perf can sort and report by sample's data page size. Here is an example: perf report --stdio --mem-mode --sort=comm,symbol,phys_daddr,data_page_size # To display the perf.data header info, please use # --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 9K of event 'mem-loads:uP' # Total weight : 9028 # Sort order : comm,symbol,phys_daddr,data_page_size # # Overhead Command Symbol Data Physical # Address # Data Page Size # ........ ....... ............................ # ...................... ...................... # 11.19% dtlb [.] touch_buffer [.] 0x00000003fec82ea8 4K 8.61% dtlb [.] GetTickCount [.] 0x00000003c4f2c8a8 4K 4.52% dtlb [.] GetTickCount [.] 0x00000003fec82f58 4K 4.33% dtlb [.] __gettimeofday [.] 0x00000003fec82f48 4K 4.32% dtlb [.] GetTickCount [.] 0x00000003fec82f78 4K 4.28% dtlb [.] GetTickCount [.] 0x00000003fec82f50 4K 4.23% dtlb [.] GetTickCount [.] 0x00000003fec82f70 4K 4.11% dtlb [.] GetTickCount [.] 0x00000003fec82f68 4K 4.00% dtlb [.] Calibrate [.] 0x00000003fec82f98 4K 3.91% dtlb [.] Calibrate [.] 0x00000003fec82f90 4K 3.43% dtlb [.] touch_buffer [.] 0x00000003fec82e98 4K 3.42% dtlb [.] touch_buffer [.] 0x00000003fec82e90 4K 0.09% dtlb [.] DoDependentLoads [.] 0x000000036ea084c0 2M 0.08% dtlb [.] DoDependentLoads [.] 0x000000032b010b80 2M Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/hist.c | 3 +++ tools/perf/util/hist.h | 1 + tools/perf/util/machine.c | 7 +++++-- tools/perf/util/map_symbol.h | 1 + tools/perf/util/sort.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 1 + 7 files changed, 42 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index d068103690cc..8f7f4e9605d8 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -150,6 +150,7 @@ OPTIONS - snoop: type of snoop (if any) for the data at the time of the sample - dcacheline: the cacheline the data address is on at the time of the sample - phys_daddr: physical address of data being executed on at the time of sample + - data_page_size: the data page size of data being executed on at the time of sample And the default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7feeaa07c777..a08fb9ea411b 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -188,6 +188,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR, unresolved_col_width + 4 + 2); + hists__new_col_len(hists, HISTC_MEM_DATA_PAGE_SIZE, + unresolved_col_width + 4 + 2); + } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index df6c6eea0960..14f66330923d 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -56,6 +56,7 @@ enum hist_column { HISTC_MEM_DADDR_SYMBOL, HISTC_MEM_DADDR_DSO, HISTC_MEM_PHYS_DADDR, + HISTC_MEM_DATA_PAGE_SIZE, HISTC_MEM_LOCKED, HISTC_MEM_TLB, HISTC_MEM_LVL, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1ae32a81639c..f841f3503cae 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2023,11 +2023,12 @@ static void ip__resolve_ams(struct thread *thread, ams->ms.sym = al.sym; ams->ms.map = al.map; ams->phys_addr = 0; + ams->data_page_size = 0; } static void ip__resolve_data(struct thread *thread, u8 m, struct addr_map_symbol *ams, - u64 addr, u64 phys_addr) + u64 addr, u64 phys_addr, u64 daddr_page_size) { struct addr_location al; @@ -2041,6 +2042,7 @@ static void ip__resolve_data(struct thread *thread, ams->ms.sym = al.sym; ams->ms.map = al.map; ams->phys_addr = phys_addr; + ams->data_page_size = daddr_page_size; } struct mem_info *sample__resolve_mem(struct perf_sample *sample, @@ -2053,7 +2055,8 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); ip__resolve_data(al->thread, al->cpumode, &mi->daddr, - sample->addr, sample->phys_addr); + sample->addr, sample->phys_addr, + sample->data_page_size); mi->data_src.val = sample->data_src; return mi; diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index 5b8ca93798e9..7d22ade082c8 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -19,5 +19,6 @@ struct addr_map_symbol { u64 addr; u64 al_addr; u64 phys_addr; + u64 data_page_size; }; #endif // __PERF_MAP_SYMBOL diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7d87bfcffb3f..80907bc32683 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1462,6 +1462,35 @@ struct sort_entry sort_mem_phys_daddr = { .se_width_idx = HISTC_MEM_PHYS_DADDR, }; +static int64_t +sort__data_page_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = left->mem_info->daddr.data_page_size; + if (right->mem_info) + r = right->mem_info->daddr.data_page_size; + + return (int64_t)(r - l); +} + +static int hist_entry__data_page_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + char str[PAGE_SIZE_NAME_LEN]; + + return repsep_snprintf(bf, size, "%-*s", width, + get_page_size_name(he->mem_info->daddr.data_page_size, str)); +} + +struct sort_entry sort_mem_data_page_size = { + .se_header = "Data Page Size", + .se_cmp = sort__data_page_size_cmp, + .se_snprintf = hist_entry__data_page_size_snprintf, + .se_width_idx = HISTC_MEM_DATA_PAGE_SIZE, +}; + static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1740,6 +1769,7 @@ static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr), + DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 66d39c4cfe2b..e50f2b695bc4 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -255,6 +255,7 @@ enum sort_type { SORT_MEM_DCACHELINE, SORT_MEM_IADDR_SYMBOL, SORT_MEM_PHYS_DADDR, + SORT_MEM_DATA_PAGE_SIZE, }; /* -- cgit v1.3.1 From 2e7f545096f954a9726c9415763dd0bfbcac47e0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:59 -0800 Subject: perf mem: Factor out a function to generate sort order Now, "--phys-data" is the only option which impacts the sort order. A simple "if else" is enough to handle the option. But there will be more options added, e.g. "--data-page-size", which also impact the sort order. The code will become too complex to be maintained. Divide the sort order string into several small pieces. The first piece is always the default sort string for LOAD/STORE. Appends the specific sort string if related option is applied. No functional change. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-4-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index fdfbff7592f4..823742036ddb 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -298,11 +298,35 @@ out_delete: perf_session__delete(session); return ret; } +static char *get_sort_order(struct perf_mem *mem) +{ + bool has_extra_options = mem->phys_addr ? true : false; + char sort[128]; + + /* + * there is no weight (cost) associated with stores, so don't print + * the column + */ + if (!(mem->operation & MEM_OPERATION_LOAD)) { + strcpy(sort, "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked"); + } else if (has_extra_options) { + strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr," + "dso_daddr,snoop,tlb,locked"); + } else + return NULL; + + if (mem->phys_addr) + strcat(sort, ",phys_daddr"); + + return strdup(sort); +} static int report_events(int argc, const char **argv, struct perf_mem *mem) { const char **rep_argv; int ret, i = 0, j, rep_argc; + char *new_sort_order; if (mem->dump_raw) return report_raw_events(mem); @@ -316,20 +340,9 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) rep_argv[i++] = "--mem-mode"; rep_argv[i++] = "-n"; /* display number of samples */ - /* - * there is no weight (cost) associated with stores, so don't print - * the column - */ - if (!(mem->operation & MEM_OPERATION_LOAD)) { - if (mem->phys_addr) - rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked,phys_daddr"; - else - rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked"; - } else if (mem->phys_addr) - rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr," - "dso_daddr,snoop,tlb,locked,phys_daddr"; + new_sort_order = get_sort_order(mem); + if (new_sort_order) + rep_argv[i++] = new_sort_order; for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; -- cgit v1.3.1 From 29f080881601c90d39c8fa31c125ac70b8894b5e Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 22 Dec 2020 12:02:27 -0800 Subject: kselftest/arm64: check GCR_EL1 after context switch This test is specific to MTE and verifies that the GCR_EL1 register is context switched correctly. It spawns 1024 processes and each process spawns 5 threads. Each thread writes a random setting of GCR_EL1 through the prctl() system call and reads it back verifying that it is the same. If the values are not the same it reports a failure. Note: The test has been extended to verify that even SYNC and ASYNC mode setting is preserved correctly over context switching. Link: https://lkml.kernel.org/r/b51a165426e906e7ec8a68d806ef3f8cd92581a6.1606161801.git.andreyknvl@google.com Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Acked-by: Catalin Marinas Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/arm64/mte/Makefile | 2 +- .../selftests/arm64/mte/check_gcr_el1_cswitch.c | 154 +++++++++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c (limited to 'tools') diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index 2480226dfe57..0b3af552632a 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 ARM Limited -CFLAGS += -std=gnu99 -I. +CFLAGS += -std=gnu99 -I. -lpthread SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) PROGS := $(patsubst %.c,%,$(SRCS)) diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c new file mode 100644 index 000000000000..a876db1f096a --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" + +#define PR_SET_TAGGED_ADDR_CTRL 55 +#define PR_GET_TAGGED_ADDR_CTRL 56 +# define PR_TAGGED_ADDR_ENABLE (1UL << 0) +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) + +#include "mte_def.h" + +#define NUM_ITERATIONS 1024 +#define MAX_THREADS 5 +#define THREAD_ITERATIONS 1000 + +void *execute_thread(void *x) +{ + pid_t pid = *((pid_t *)x); + pid_t tid = gettid(); + uint64_t prctl_tag_mask; + uint64_t prctl_set; + uint64_t prctl_get; + uint64_t prctl_tcf; + + srand(time(NULL) ^ (pid << 16) ^ (tid << 16)); + + prctl_tag_mask = rand() & 0xffff; + + if (prctl_tag_mask % 2) + prctl_tcf = PR_MTE_TCF_SYNC; + else + prctl_tcf = PR_MTE_TCF_ASYNC; + + prctl_set = PR_TAGGED_ADDR_ENABLE | prctl_tcf | (prctl_tag_mask << PR_MTE_TAG_SHIFT); + + for (int j = 0; j < THREAD_ITERATIONS; j++) { + if (prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_set, 0, 0, 0)) { + perror("prctl() failed"); + goto fail; + } + + prctl_get = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0); + + if (prctl_set != prctl_get) { + ksft_print_msg("Error: prctl_set: 0x%lx != prctl_get: 0x%lx\n", + prctl_set, prctl_get); + goto fail; + } + } + + return (void *)KSFT_PASS; + +fail: + return (void *)KSFT_FAIL; +} + +int execute_test(pid_t pid) +{ + pthread_t thread_id[MAX_THREADS]; + int thread_data[MAX_THREADS]; + + for (int i = 0; i < MAX_THREADS; i++) + pthread_create(&thread_id[i], NULL, + execute_thread, (void *)&pid); + + for (int i = 0; i < MAX_THREADS; i++) + pthread_join(thread_id[i], (void *)&thread_data[i]); + + for (int i = 0; i < MAX_THREADS; i++) + if (thread_data[i] == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} + +int mte_gcr_fork_test(void) +{ + pid_t pid; + int results[NUM_ITERATIONS]; + pid_t cpid; + int res; + + for (int i = 0; i < NUM_ITERATIONS; i++) { + pid = fork(); + + if (pid < 0) + return KSFT_FAIL; + + if (pid == 0) { + cpid = getpid(); + + res = execute_test(cpid); + + exit(res); + } + } + + for (int i = 0; i < NUM_ITERATIONS; i++) { + wait(&res); + + if (WIFEXITED(res)) + results[i] = WEXITSTATUS(res); + else + --i; + } + + for (int i = 0; i < NUM_ITERATIONS; i++) + if (results[i] == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + ksft_set_plan(1); + + evaluate_test(mte_gcr_fork_test(), + "Verify that GCR_EL1 is set correctly on context switch\n"); + + mte_restore_setup(); + ksft_print_cnts(); + + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} -- cgit v1.3.1 From 11b844b0b7c7c3dc8e8f4d0bbaad5e798351862c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 23 Dec 2020 12:06:52 -0800 Subject: selftests/bpf: Work-around EBUSY errors from hashmap update/delete 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") introduced a possibility of getting EBUSY error on lock contention, which seems to happen very deterministically in test_maps when running 1024 threads on low-CPU machine. In libbpf CI case, it's a 2 CPU VM and it's hitting this 100% of the time. Work around by retrying on EBUSY (and EAGAIN, while we are at it) after a small sleep. sched_yield() is too agressive and fails even after 20 retries, so I went with usleep(1) for backoff. Also log actual error returned to make it easier to see what's going on. Fixes: 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201223200652.3417075-1-andrii@kernel.org --- tools/testing/selftests/bpf/test_maps.c | 48 ++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 0ad3e6305ff0..51adc42b2b40 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1312,22 +1312,58 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 +#define MAP_RETRIES 20 + +static int map_update_retriable(int map_fd, const void *key, const void *value, + int flags, int attempts) +{ + while (bpf_map_update_elem(map_fd, key, value, flags)) { + if (!attempts || (errno != EAGAIN && errno != EBUSY)) + return -errno; + + usleep(1); + attempts--; + } + + return 0; +} + +static int map_delete_retriable(int map_fd, const void *key, int attempts) +{ + while (bpf_map_delete_elem(map_fd, key)) { + if (!attempts || (errno != EAGAIN && errno != EBUSY)) + return -errno; + + usleep(1); + attempts--; + } + + return 0; +} + static void test_update_delete(unsigned int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; - int i, key, value; + int i, key, value, err; for (i = fn; i < MAP_SIZE; i += TASKS) { key = value = i; if (do_update) { - assert(bpf_map_update_elem(fd, &key, &value, - BPF_NOEXIST) == 0); - assert(bpf_map_update_elem(fd, &key, &value, - BPF_EXIST) == 0); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); } else { - assert(bpf_map_delete_elem(fd, &key) == 0); + err = map_delete_retriable(fd, &key, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); } } } -- cgit v1.3.1 From 6e5192143ab571dbefb584edf900565098bdfd23 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:03:04 -0300 Subject: tools headers UAPI: Update epoll_pwait2 affected files To pick the changes from: b0a0c2615f6f199a ("epoll: wire up syscall epoll_pwait2") That addresses these perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Willem de Bruijn Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 4 +++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index fc48c64700eb..728752917785 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_epoll_pwait2 441 +__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #undef __NR_syscalls -#define __NR_syscalls 441 +#define __NR_syscalls 442 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 379819244b91..78672124d28b 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -362,6 +362,7 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 # # Due to a historical design error, certain syscalls are numbered differently -- cgit v1.3.1 From 7f3905f00a2025591a6883ee6880f928029b4d96 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:04:54 -0300 Subject: tools headers cpufeatures: Sync with the kernel sources To pick the changes in: 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature") e1b35da5e624f8b0 ("x86: Enumerate AVX512 FP16 CPUID feature flag") That causes only these 'perf bench' objects to rebuild: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kyung Min Park Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index f5ef2d5b9231..84b887825f12 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -237,6 +237,7 @@ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -376,6 +377,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -- cgit v1.3.1 From fde668244d1d8d490b5b9daf53fe4f92a6751773 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:07:36 -0300 Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: Fixes: 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature") That cause these changes in tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2020-12-21 09:09:05.593005003 -0300 +++ after 2020-12-21 09:12:48.436994802 -0300 @@ -21,7 +21,7 @@ [0x0000004f] = "PPIN", [0x00000060] = "LBR_CORE_TO", [0x00000079] = "IA32_UCODE_WRITE", - [0x0000008b] = "IA32_UCODE_REV", + [0x0000008b] = "AMD64_PATCH_LEVEL", [0x0000008C] = "IA32_SGXLEPUBKEYHASH0", [0x0000008D] = "IA32_SGXLEPUBKEYHASH1", [0x0000008E] = "IA32_SGXLEPUBKEYHASH2", @@ -286,6 +286,7 @@ [0xc0010114 - x86_AMD_V_KVM_MSRs_offset] = "VM_CR", [0xc0010115 - x86_AMD_V_KVM_MSRs_offset] = "VM_IGNNE", [0xc0010117 - x86_AMD_V_KVM_MSRs_offset] = "VM_HSAVE_PA", + [0xc001011e - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VM_PAGE_FLUSH", [0xc001011f - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VIRT_SPEC_CTRL", [0xc0010130 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV_ES_GHCB", [0xc0010131 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV", $ The new MSR has a pattern that wasn't matched to avoid a clash with IA32_UCODE_REV, change the regex to prefer the more relevant AMD_ prefixed ones to catch this new AMD64_VM_PAGE_FLUSH MSR. Which causes these parts of tools/perf/ to be rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf This addresses this perf tools build warning: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 1 + tools/perf/trace/beauty/tracepoints/x86_msr.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 2b5fc9accec4..546d6ecf0a35 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -472,6 +472,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh index 831c02cf0586..27ee1ea1fe94 100755 --- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh @@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h printf "static const char *x86_MSRs[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*' -egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \ +egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSCDEADLINE|UCODE_REV)|IDT_FCR4)' | \ sed -r "s/$regex/\2 \1/g" | sort -n | \ xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n" -- cgit v1.3.1 From 288807fc3a5f19ed77cb8c25342323bbe58a75a1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:20:52 -0300 Subject: tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: fb04a1eddb1a65b6 ("KVM: X86: Implement ring-based dirty memory tracking") That result in these change in tooling: $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ cp arch/x86/include/uapi/asm/kvm.h tools/arch/x86/include/uapi/asm/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2020-12-21 11:55:45.229737066 -0300 +++ after 2020-12-21 11:55:56.379983393 -0300 @@ -90,6 +90,7 @@ [0xc0] = "CLEAR_DIRTY_LOG", [0xc1] = "GET_SUPPORTED_HV_CPUID", [0xc6] = "X86_SET_MSR_FILTER", + [0xc7] = "RESET_DIRTY_RINGS", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ Now one can use that string in filters when tracing ioctls, etc. And silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Peter Xu Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 1 + tools/include/uapi/linux/kvm.h | 56 ++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba8..8e76d3701db3 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -12,6 +12,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define DE_VECTOR 0 #define DB_VECTOR 1 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index ca41220b40b8..886802b8ffba 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -250,6 +250,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_ARM_NISV 28 #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1053,6 +1054,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 #ifdef KVM_CAP_IRQ_ROUTING @@ -1511,7 +1514,7 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) -/* Available with KVM_CAP_HYPERV_CPUID */ +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) /* Available with KVM_CAP_ARM_SVE */ @@ -1557,6 +1560,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_X86_MSR_FILTER */ #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1710,4 +1716,52 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY BIT(0) +#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + #endif /* __LINUX_KVM_H */ -- cgit v1.3.1 From cd97448db80e0238a819dc6b733da6ec0173cadd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 12:51:03 -0300 Subject: tools headers UAPI: Sync KVM's vmx.h header with the kernel sources To pick the changes in: bf0cd88ce363a2de ("KVM: x86: emulate wait-for-SIPI and SIPI-VMExit") That makes 'perf kvm-stat' aware of this new SIPI_SIGNAL exit reason, thus addressing the following perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Yadong Qi Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/vmx.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d5..ada955c5ebb6 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -32,6 +32,7 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 +#define EXIT_REASON_SIPI_SIGNAL 4 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -94,6 +95,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ + { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ -- cgit v1.3.1 From 9880e71cbaa8a0e826d8f144704301476b2d6cf9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 12:53:44 -0300 Subject: tools kvm headers: Update KVM headers from the kernel sources To pick the changes from: 8d14797b53f044fd ("KVM: arm64: Move 'struct kvm_arch_memory_slot' out of uapi/") That don't causes any changes in tooling, only addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Cc: Marc Zyngier Cc: Will Deacon Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 1c17c3a24411..24223adae150 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -156,9 +156,6 @@ struct kvm_sync_regs { __u64 device_irq_level; }; -struct kvm_arch_memory_slot { -}; - /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. -- cgit v1.3.1 From b71df82d05b7a38f38c4b1109c57b209b8ed43ff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 20:04:45 -0300 Subject: tools headers UAPI: Synch KVM's svm.h header with the kernel To pick up the changes from: d1949b93c60504b3 ("KVM: SVM: Add support for CR8 write traps for an SEV-ES guest") 5b51cb13160ae0ba ("KVM: SVM: Add support for CR4 write traps for an SEV-ES guest") f27ad38aac23263c ("KVM: SVM: Add support for CR0 write traps for an SEV-ES guest") 2985afbcdbb1957a ("KVM: SVM: Add support for EFER write traps for an SEV-ES guest") 291bd20d5d88814a ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Picking these new SVM exit reasons: + { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ + { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ + { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ + { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ + { SVM_EXIT_VMGEXIT, "vmgexit" }, \ + { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \ + { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \ + { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ + { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ + { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ And address this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h' diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/svm.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index f1d8307454e0..554f75fe013c 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -77,10 +77,28 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_EFER_WRITE_TRAP 0x08f +#define SVM_EXIT_CR0_WRITE_TRAP 0x090 +#define SVM_EXIT_CR1_WRITE_TRAP 0x091 +#define SVM_EXIT_CR2_WRITE_TRAP 0x092 +#define SVM_EXIT_CR3_WRITE_TRAP 0x093 +#define SVM_EXIT_CR4_WRITE_TRAP 0x094 +#define SVM_EXIT_CR5_WRITE_TRAP 0x095 +#define SVM_EXIT_CR6_WRITE_TRAP 0x096 +#define SVM_EXIT_CR7_WRITE_TRAP 0x097 +#define SVM_EXIT_CR8_WRITE_TRAP 0x098 +#define SVM_EXIT_CR9_WRITE_TRAP 0x099 +#define SVM_EXIT_CR10_WRITE_TRAP 0x09a +#define SVM_EXIT_CR11_WRITE_TRAP 0x09b +#define SVM_EXIT_CR12_WRITE_TRAP 0x09c +#define SVM_EXIT_CR13_WRITE_TRAP 0x09d +#define SVM_EXIT_CR14_WRITE_TRAP 0x09e +#define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +#define SVM_EXIT_VMGEXIT 0x403 /* SEV-ES software-defined VMGEXIT events */ #define SVM_VMGEXIT_MMIO_READ 0x80000001 @@ -183,10 +201,20 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ + { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ + { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ + { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ + { SVM_EXIT_VMGEXIT, "vmgexit" }, \ + { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \ + { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \ + { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ + { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ + { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } -- cgit v1.3.1 From 9bad32b2c63c985fc9f04b29186974ad5bb0b74c Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 18 Dec 2020 15:59:21 +0800 Subject: perf powerpc: Move syscall.tbl check to check-headers.sh It is better to check syscall.tbl for powerpc in check-headers.sh, it is similar with commit c9b51a017065 ("perf tools: Move syscall_64.tbl check into check-headers.sh"). Signed-off-by: Tiezhu Yang Reviewed-by: Naveen N. Rao Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1608278364-6733-2-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/Makefile | 7 ------- tools/perf/check-headers.sh | 1 + 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index e58d00d62f02..840ea0e59287 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -14,7 +14,6 @@ PERF_HAVE_JITDUMP := 1 out := $(OUTPUT)arch/powerpc/include/generated/asm header32 := $(out)/syscalls_32.c header64 := $(out)/syscalls_64.c -syskrn := $(srctree)/arch/powerpc/kernel/syscalls/syscall.tbl sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls sysdef := $(sysprf)/syscall.tbl systbl := $(sysprf)/mksyscalltbl @@ -23,15 +22,9 @@ systbl := $(sysprf)/mksyscalltbl _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header64): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' '64' $(sysdef) > $@ $(header32): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' '32' $(sysdef) > $@ clean:: diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 15ecb1803fb9..3c96fd3a7370 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -144,6 +144,7 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl +check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl for i in $BEAUTY_FILES; do beauty_check $i -B -- cgit v1.3.1 From 22ffc3f5598d2a51e2da4ea5e07e734715bde782 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 18 Dec 2020 15:59:22 +0800 Subject: perf s390: Move syscall.tbl check into check-headers.sh It is better to check syscall.tbl for s390 in check-headers.sh, it is similar with commit c9b51a017065 ("perf tools: Move syscall_64.tbl check into check-headers.sh"). Signed-off-by: Tiezhu Yang Reviewed-by: Naveen N. Rao Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1608278364-6733-3-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/Makefile | 4 ---- tools/perf/check-headers.sh | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 6ac8887be7c9..74bffbea03e2 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -12,7 +12,6 @@ PERF_HAVE_JITDUMP := 1 out := $(OUTPUT)arch/s390/include/generated/asm header := $(out)/syscalls_64.c -syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls sysdef := $(sysprf)/syscall.tbl systbl := $(sysprf)/mksyscalltbl @@ -21,9 +20,6 @@ systbl := $(sysprf)/mksyscalltbl _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ clean:: diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 3c96fd3a7370..dded93a2bc89 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -145,6 +145,7 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl +check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl for i in $BEAUTY_FILES; do beauty_check $i -B -- cgit v1.3.1 From c5ef52944a2d80017092cdf6aa474b2f4d596072 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Dec 2020 13:13:16 -0300 Subject: perf tools: Update powerpc's syscall.tbl copy from the kernel sources This silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' Just make them same: cp arch/powerpc/kernel/syscalls/syscall.tbl tools/perf/arch/powerpc/entry/syscalls/syscall.tbl Signed-off-by: Tiezhu Yang Reviewed-by: Naveen N. Rao Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1608278364-6733-4-git-send-email-yangtiezhu@loongson.cn [ There were updates after Tiezhu's post, so I just updated the copy ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 26 ++++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index b168364ac050..f744eb5cba88 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -9,7 +9,9 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 nospu fork ppc_fork +2 32 fork ppc_fork sys_fork +2 64 fork sys_fork +2 spu fork sys_ni_syscall 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -158,7 +160,9 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 nospu clone ppc_clone +120 32 clone ppc_clone sys_clone +120 64 clone sys_clone +120 spu clone sys_ni_syscall 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -240,7 +244,9 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 nospu vfork ppc_vfork +189 32 vfork ppc_vfork sys_vfork +189 64 vfork sys_vfork +189 spu vfork sys_ni_syscall 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -316,8 +322,8 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext ppc32_swapcontext -249 64 swapcontext ppc64_swapcontext +249 32 swapcontext ppc_swapcontext compat_sys_swapcontext +249 64 swapcontext sys_swapcontext 249 spu swapcontext sys_ni_syscall 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 @@ -456,7 +462,7 @@ 361 common bpf sys_bpf 362 nospu execveat sys_execveat compat_sys_execveat 363 32 switch_endian sys_ni_syscall -363 64 switch_endian ppc_switch_endian +363 64 switch_endian sys_switch_endian 363 spu switch_endian sys_ni_syscall 364 common userfaultfd sys_userfaultfd 365 common membarrier sys_membarrier @@ -516,6 +522,12 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 nospu clone3 ppc_clone3 +435 32 clone3 ppc_clone3 sys_clone3 +435 64 clone3 sys_clone3 +435 spu clone3 sys_ni_syscall +436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 -- cgit v1.3.1 From b27d20ab1c6a1a7738c02419c28287d260ca8036 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 23 Dec 2020 13:24:35 -0300 Subject: perf tools: Update s390's syscall.tbl copy from the kernel sources This silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' Just make them same: cp arch/s390/kernel/syscalls/syscall.tbl tools/perf/arch/s390/entry/syscalls/syscall.tbl Signed-off-by: Tiezhu Yang Reviewed-by: Naveen N. Rao Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1608278364-6733-5-git-send-email-yangtiezhu@loongson.cn [ There were updates after Tiezhu's post, so I just updated the copy ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 396 ++++++++++++++---------- 1 file changed, 226 insertions(+), 170 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index d2fa9647ce25..d443423495e5 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -15,86 +15,86 @@ 5 common open sys_open compat_sys_open 6 common close sys_close sys_close 7 common restart_syscall sys_restart_syscall sys_restart_syscall -8 common creat sys_creat compat_sys_creat -9 common link sys_link compat_sys_link -10 common unlink sys_unlink compat_sys_unlink +8 common creat sys_creat sys_creat +9 common link sys_link sys_link +10 common unlink sys_unlink sys_unlink 11 common execve sys_execve compat_sys_execve -12 common chdir sys_chdir compat_sys_chdir -13 32 time - compat_sys_time -14 common mknod sys_mknod compat_sys_mknod -15 common chmod sys_chmod compat_sys_chmod -16 32 lchown - compat_sys_s390_lchown16 +12 common chdir sys_chdir sys_chdir +13 32 time - sys_time32 +14 common mknod sys_mknod sys_mknod +15 common chmod sys_chmod sys_chmod +16 32 lchown - sys_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid -21 common mount sys_mount -22 common umount sys_oldumount compat_sys_oldumount -23 32 setuid - compat_sys_s390_setuid16 -24 32 getuid - compat_sys_s390_getuid16 -25 32 stime - compat_sys_stime +21 common mount sys_mount sys_mount +22 common umount sys_oldumount sys_oldumount +23 32 setuid - sys_setuid16 +24 32 getuid - sys_getuid16 +25 32 stime - sys_stime32 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm sys_alarm 29 common pause sys_pause sys_pause -30 common utime sys_utime compat_sys_utime -33 common access sys_access compat_sys_access +30 common utime sys_utime sys_utime32 +33 common access sys_access sys_access 34 common nice sys_nice sys_nice 36 common sync sys_sync sys_sync 37 common kill sys_kill sys_kill -38 common rename sys_rename compat_sys_rename -39 common mkdir sys_mkdir compat_sys_mkdir -40 common rmdir sys_rmdir compat_sys_rmdir +38 common rename sys_rename sys_rename +39 common mkdir sys_mkdir sys_mkdir +40 common rmdir sys_rmdir sys_rmdir 41 common dup sys_dup sys_dup -42 common pipe sys_pipe compat_sys_pipe +42 common pipe sys_pipe sys_pipe 43 common times sys_times compat_sys_times -45 common brk sys_brk compat_sys_brk -46 32 setgid - compat_sys_s390_setgid16 -47 32 getgid - compat_sys_s390_getgid16 -48 common signal sys_signal compat_sys_signal -49 32 geteuid - compat_sys_s390_geteuid16 -50 32 getegid - compat_sys_s390_getegid16 -51 common acct sys_acct compat_sys_acct -52 common umount2 sys_umount compat_sys_umount +45 common brk sys_brk sys_brk +46 32 setgid - sys_setgid16 +47 32 getgid - sys_getgid16 +48 common signal sys_signal sys_signal +49 32 geteuid - sys_geteuid16 +50 32 getegid - sys_getegid16 +51 common acct sys_acct sys_acct +52 common umount2 sys_umount sys_umount 54 common ioctl sys_ioctl compat_sys_ioctl 55 common fcntl sys_fcntl compat_sys_fcntl 57 common setpgid sys_setpgid sys_setpgid 60 common umask sys_umask sys_umask -61 common chroot sys_chroot compat_sys_chroot +61 common chroot sys_chroot sys_chroot 62 common ustat sys_ustat compat_sys_ustat 63 common dup2 sys_dup2 sys_dup2 64 common getppid sys_getppid sys_getppid 65 common getpgrp sys_getpgrp sys_getpgrp 66 common setsid sys_setsid sys_setsid 67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - compat_sys_s390_setreuid16 -71 32 setregid - compat_sys_s390_setregid16 -72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend +70 32 setreuid - sys_setreuid16 +71 32 setregid - sys_setregid16 +72 common sigsuspend sys_sigsuspend sys_sigsuspend 73 common sigpending sys_sigpending compat_sys_sigpending -74 common sethostname sys_sethostname compat_sys_sethostname +74 common sethostname sys_sethostname sys_sethostname 75 common setrlimit sys_setrlimit compat_sys_setrlimit 76 32 getrlimit - compat_sys_old_getrlimit 77 common getrusage sys_getrusage compat_sys_getrusage 78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday 79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - compat_sys_s390_getgroups16 -81 32 setgroups - compat_sys_s390_setgroups16 -83 common symlink sys_symlink compat_sys_symlink -85 common readlink sys_readlink compat_sys_readlink -86 common uselib sys_uselib compat_sys_uselib -87 common swapon sys_swapon compat_sys_swapon -88 common reboot sys_reboot compat_sys_reboot +80 32 getgroups - sys_getgroups16 +81 32 setgroups - sys_setgroups16 +83 common symlink sys_symlink sys_symlink +85 common readlink sys_readlink sys_readlink +86 common uselib sys_uselib sys_uselib +87 common swapon sys_swapon sys_swapon +88 common reboot sys_reboot sys_reboot 89 common readdir - compat_sys_old_readdir 90 common mmap sys_old_mmap compat_sys_s390_old_mmap -91 common munmap sys_munmap compat_sys_munmap +91 common munmap sys_munmap sys_munmap 92 common truncate sys_truncate compat_sys_truncate 93 common ftruncate sys_ftruncate compat_sys_ftruncate 94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - compat_sys_s390_fchown16 +95 32 fchown - sys_fchown16 96 common getpriority sys_getpriority sys_getpriority 97 common setpriority sys_setpriority sys_setpriority 99 common statfs sys_statfs compat_sys_statfs 100 common fstatfs sys_fstatfs compat_sys_fstatfs 101 32 ioperm - - 102 common socketcall sys_socketcall compat_sys_socketcall -103 common syslog sys_syslog compat_sys_syslog +103 common syslog sys_syslog sys_syslog 104 common setitimer sys_setitimer compat_sys_setitimer 105 common getitimer sys_getitimer compat_sys_getitimer 106 common stat sys_newstat compat_sys_newstat @@ -104,76 +104,76 @@ 111 common vhangup sys_vhangup sys_vhangup 112 common idle - - 114 common wait4 sys_wait4 compat_sys_wait4 -115 common swapoff sys_swapoff compat_sys_swapoff +115 common swapoff sys_swapoff sys_swapoff 116 common sysinfo sys_sysinfo compat_sys_sysinfo 117 common ipc sys_s390_ipc compat_sys_s390_ipc 118 common fsync sys_fsync sys_fsync 119 common sigreturn sys_sigreturn compat_sys_sigreturn -120 common clone sys_clone compat_sys_clone -121 common setdomainname sys_setdomainname compat_sys_setdomainname -122 common uname sys_newuname compat_sys_newuname -124 common adjtimex sys_adjtimex compat_sys_adjtimex -125 common mprotect sys_mprotect compat_sys_mprotect +120 common clone sys_clone sys_clone +121 common setdomainname sys_setdomainname sys_setdomainname +122 common uname sys_newuname sys_newuname +124 common adjtimex sys_adjtimex sys_adjtimex_time32 +125 common mprotect sys_mprotect sys_mprotect 126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask 127 common create_module - - -128 common init_module sys_init_module compat_sys_init_module -129 common delete_module sys_delete_module compat_sys_delete_module +128 common init_module sys_init_module sys_init_module +129 common delete_module sys_delete_module sys_delete_module 130 common get_kernel_syms - - -131 common quotactl sys_quotactl compat_sys_quotactl +131 common quotactl sys_quotactl sys_quotactl 132 common getpgid sys_getpgid sys_getpgid 133 common fchdir sys_fchdir sys_fchdir -134 common bdflush sys_bdflush compat_sys_bdflush -135 common sysfs sys_sysfs compat_sys_sysfs +134 common bdflush sys_bdflush sys_bdflush +135 common sysfs sys_sysfs sys_sysfs 136 common personality sys_s390_personality sys_s390_personality 137 common afs_syscall - - -138 32 setfsuid - compat_sys_s390_setfsuid16 -139 32 setfsgid - compat_sys_s390_setfsgid16 -140 32 _llseek - compat_sys_llseek +138 32 setfsuid - sys_setfsuid16 +139 32 setfsgid - sys_setfsgid16 +140 32 _llseek - sys_llseek 141 common getdents sys_getdents compat_sys_getdents 142 32 _newselect - compat_sys_select 142 64 select sys_select - 143 common flock sys_flock sys_flock -144 common msync sys_msync compat_sys_msync -145 common readv sys_readv -146 common writev sys_writev +144 common msync sys_msync sys_msync +145 common readv sys_readv sys_readv +146 common writev sys_writev sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync 149 common _sysctl - - -150 common mlock sys_mlock compat_sys_mlock -151 common munlock sys_munlock compat_sys_munlock +150 common mlock sys_mlock sys_mlock +151 common munlock sys_munlock sys_munlock 152 common mlockall sys_mlockall sys_mlockall 153 common munlockall sys_munlockall sys_munlockall -154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam -155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam -156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler +154 common sched_setparam sys_sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler 157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler 158 common sched_yield sys_sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep -163 common mremap sys_mremap compat_sys_mremap -164 32 setresuid - compat_sys_s390_setresuid16 -165 32 getresuid - compat_sys_s390_getresuid16 +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 +163 common mremap sys_mremap sys_mremap +164 32 setresuid - sys_setresuid16 +165 32 getresuid - sys_getresuid16 167 common query_module - - -168 common poll sys_poll compat_sys_poll +168 common poll sys_poll sys_poll 169 common nfsservctl - - -170 32 setresgid - compat_sys_s390_setresgid16 -171 32 getresgid - compat_sys_s390_getresgid16 -172 common prctl sys_prctl compat_sys_prctl +170 32 setresgid - sys_setresgid16 +171 32 getresgid - sys_getresgid16 +172 common prctl sys_prctl sys_prctl 173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common pread64 sys_pread64 compat_sys_s390_pread64 181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - compat_sys_s390_chown16 -183 common getcwd sys_getcwd compat_sys_getcwd -184 common capget sys_capget compat_sys_capget -185 common capset sys_capset compat_sys_capset +182 32 chown - sys_chown16 +183 common getcwd sys_getcwd sys_getcwd +184 common capget sys_capget sys_capget +185 common capset sys_capset sys_capset 186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack 187 common sendfile sys_sendfile64 compat_sys_sendfile 188 common getpmsg - - @@ -187,7 +187,7 @@ 195 32 stat64 - compat_sys_s390_stat64 196 32 lstat64 - compat_sys_s390_lstat64 197 32 fstat64 - compat_sys_s390_fstat64 -198 32 lchown32 - compat_sys_lchown +198 32 lchown32 - sys_lchown 198 64 lchown sys_lchown - 199 32 getuid32 - sys_getuid 199 64 getuid sys_getuid - @@ -201,21 +201,21 @@ 203 64 setreuid sys_setreuid - 204 32 setregid32 - sys_setregid 204 64 setregid sys_setregid - -205 32 getgroups32 - compat_sys_getgroups +205 32 getgroups32 - sys_getgroups 205 64 getgroups sys_getgroups - -206 32 setgroups32 - compat_sys_setgroups +206 32 setgroups32 - sys_setgroups 206 64 setgroups sys_setgroups - 207 32 fchown32 - sys_fchown 207 64 fchown sys_fchown - 208 32 setresuid32 - sys_setresuid 208 64 setresuid sys_setresuid - -209 32 getresuid32 - compat_sys_getresuid +209 32 getresuid32 - sys_getresuid 209 64 getresuid sys_getresuid - 210 32 setresgid32 - sys_setresgid 210 64 setresgid sys_setresgid - -211 32 getresgid32 - compat_sys_getresgid +211 32 getresgid32 - sys_getresgid 211 64 getresgid sys_getresgid - -212 32 chown32 - compat_sys_chown +212 32 chown32 - sys_chown 212 64 chown sys_chown - 213 32 setuid32 - sys_setuid 213 64 setuid sys_setuid - @@ -225,166 +225,222 @@ 215 64 setfsuid sys_setfsuid - 216 32 setfsgid32 - sys_setfsgid 216 64 setfsgid sys_setfsgid - -217 common pivot_root sys_pivot_root compat_sys_pivot_root -218 common mincore sys_mincore compat_sys_mincore -219 common madvise sys_madvise compat_sys_madvise -220 common getdents64 sys_getdents64 compat_sys_getdents64 +217 common pivot_root sys_pivot_root sys_pivot_root +218 common mincore sys_mincore sys_mincore +219 common madvise sys_madvise sys_madvise +220 common getdents64 sys_getdents64 sys_getdents64 221 32 fcntl64 - compat_sys_fcntl64 222 common readahead sys_readahead compat_sys_s390_readahead 223 32 sendfile64 - compat_sys_sendfile64 -224 common setxattr sys_setxattr compat_sys_setxattr -225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr -226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr -227 common getxattr sys_getxattr compat_sys_getxattr -228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr -229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr -230 common listxattr sys_listxattr compat_sys_listxattr -231 common llistxattr sys_llistxattr compat_sys_llistxattr -232 common flistxattr sys_flistxattr compat_sys_flistxattr -233 common removexattr sys_removexattr compat_sys_removexattr -234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr -235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr +224 common setxattr sys_setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr sys_listxattr +231 common llistxattr sys_llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr sys_flistxattr +233 common removexattr sys_removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr sys_fremovexattr 236 common gettid sys_gettid sys_gettid 237 common tkill sys_tkill sys_tkill -238 common futex sys_futex compat_sys_futex +238 common futex sys_futex sys_futex_time32 239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 241 common tgkill sys_tgkill sys_tgkill 243 common io_setup sys_io_setup compat_sys_io_setup -244 common io_destroy sys_io_destroy compat_sys_io_destroy -245 common io_getevents sys_io_getevents compat_sys_io_getevents +244 common io_destroy sys_io_destroy sys_io_destroy +245 common io_getevents sys_io_getevents sys_io_getevents_time32 246 common io_submit sys_io_submit compat_sys_io_submit -247 common io_cancel sys_io_cancel compat_sys_io_cancel +247 common io_cancel sys_io_cancel sys_io_cancel 248 common exit_group sys_exit_group sys_exit_group 249 common epoll_create sys_epoll_create sys_epoll_create -250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl -251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait -252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address +250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address sys_set_tid_address 253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime compat_sys_timer_settime -256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +255 common timer_settime sys_timer_settime sys_timer_settime32 +256 common timer_gettime sys_timer_gettime sys_timer_gettime32 257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun 258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime compat_sys_clock_settime -260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -261 common clock_getres sys_clock_getres compat_sys_clock_getres -262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +259 common clock_settime sys_clock_settime sys_clock_settime32 +260 common clock_gettime sys_clock_gettime sys_clock_gettime32 +261 common clock_getres sys_clock_getres sys_clock_getres_time32 +262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages +267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages 268 common mbind sys_mbind compat_sys_mbind 269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open -272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +272 common mq_unlink sys_mq_unlink sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 275 common mq_notify sys_mq_notify compat_sys_mq_notify 276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 277 common kexec_load sys_kexec_load compat_sys_kexec_load -278 common add_key sys_add_key compat_sys_add_key -279 common request_key sys_request_key compat_sys_request_key +278 common add_key sys_add_key sys_add_key +279 common request_key sys_request_key sys_request_key 280 common keyctl sys_keyctl compat_sys_keyctl 281 common waitid sys_waitid compat_sys_waitid 282 common ioprio_set sys_ioprio_set sys_ioprio_set 283 common ioprio_get sys_ioprio_get sys_ioprio_get 284 common inotify_init sys_inotify_init sys_inotify_init -285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch +285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch 287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages 288 common openat sys_openat compat_sys_openat -289 common mkdirat sys_mkdirat compat_sys_mkdirat -290 common mknodat sys_mknodat compat_sys_mknodat -291 common fchownat sys_fchownat compat_sys_fchownat -292 common futimesat sys_futimesat compat_sys_futimesat +289 common mkdirat sys_mkdirat sys_mkdirat +290 common mknodat sys_mknodat sys_mknodat +291 common fchownat sys_fchownat sys_fchownat +292 common futimesat sys_futimesat sys_futimesat_time32 293 32 fstatat64 - compat_sys_s390_fstatat64 293 64 newfstatat sys_newfstatat - -294 common unlinkat sys_unlinkat compat_sys_unlinkat -295 common renameat sys_renameat compat_sys_renameat -296 common linkat sys_linkat compat_sys_linkat -297 common symlinkat sys_symlinkat compat_sys_symlinkat -298 common readlinkat sys_readlinkat compat_sys_readlinkat -299 common fchmodat sys_fchmodat compat_sys_fchmodat -300 common faccessat sys_faccessat compat_sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6 -302 common ppoll sys_ppoll compat_sys_ppoll -303 common unshare sys_unshare compat_sys_unshare +294 common unlinkat sys_unlinkat sys_unlinkat +295 common renameat sys_renameat sys_renameat +296 common linkat sys_linkat sys_linkat +297 common symlinkat sys_symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat sys_fchmodat +300 common faccessat sys_faccessat sys_faccessat +301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +302 common ppoll sys_ppoll compat_sys_ppoll_time32 +303 common unshare sys_unshare sys_unshare 304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -306 common splice sys_splice compat_sys_splice +306 common splice sys_splice sys_splice 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range -308 common tee sys_tee compat_sys_tee +308 common tee sys_tee sys_tee 309 common vmsplice sys_vmsplice sys_vmsplice 310 common move_pages sys_move_pages compat_sys_move_pages -311 common getcpu sys_getcpu compat_sys_getcpu +311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes compat_sys_utimes +313 common utimes sys_utimes sys_utimes_time32 314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat compat_sys_utimensat +315 common utimensat sys_utimensat sys_utimensat_time32 316 common signalfd sys_signalfd compat_sys_signalfd 317 common timerfd - - 318 common eventfd sys_eventfd sys_eventfd 319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 323 common eventfd2 sys_eventfd2 sys_eventfd2 324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 -325 common pipe2 sys_pipe2 compat_sys_pipe2 +325 common pipe2 sys_pipe2 sys_pipe2 326 common dup3 sys_dup3 sys_dup3 327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 328 common preadv sys_preadv compat_sys_preadv 329 common pwritev sys_pwritev compat_sys_pwritev 330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open +331 common perf_event_open sys_perf_event_open sys_perf_event_open 332 common fanotify_init sys_fanotify_init sys_fanotify_init 333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -334 common prlimit64 sys_prlimit64 compat_sys_prlimit64 -335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at +334 common prlimit64 sys_prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at 336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns 340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv 341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev 342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr -343 common kcmp sys_kcmp compat_sys_kcmp -344 common finit_module sys_finit_module compat_sys_finit_module -345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr -346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr -347 common renameat2 sys_renameat2 compat_sys_renameat2 -348 common seccomp sys_seccomp compat_sys_seccomp -349 common getrandom sys_getrandom compat_sys_getrandom -350 common memfd_create sys_memfd_create compat_sys_memfd_create -351 common bpf sys_bpf compat_sys_bpf -352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write -353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read +343 common kcmp sys_kcmp sys_kcmp +344 common finit_module sys_finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 sys_renameat2 +348 common seccomp sys_seccomp sys_seccomp +349 common getrandom sys_getrandom sys_getrandom +350 common memfd_create sys_memfd_create sys_memfd_create +351 common bpf sys_bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read 354 common execveat sys_execveat compat_sys_execveat 355 common userfaultfd sys_userfaultfd sys_userfaultfd 356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 359 common socket sys_socket sys_socket -360 common socketpair sys_socketpair compat_sys_socketpair -361 common bind sys_bind compat_sys_bind -362 common connect sys_connect compat_sys_connect +360 common socketpair sys_socketpair sys_socketpair +361 common bind sys_bind sys_bind +362 common connect sys_connect sys_connect 363 common listen sys_listen sys_listen -364 common accept4 sys_accept4 compat_sys_accept4 +364 common accept4 sys_accept4 sys_accept4 365 common getsockopt sys_getsockopt sys_getsockopt 366 common setsockopt sys_setsockopt sys_setsockopt -367 common getsockname sys_getsockname compat_sys_getsockname -368 common getpeername sys_getpeername compat_sys_getpeername -369 common sendto sys_sendto compat_sys_sendto +367 common getsockname sys_getsockname sys_getsockname +368 common getpeername sys_getpeername sys_getpeername +369 common sendto sys_sendto sys_sendto 370 common sendmsg sys_sendmsg compat_sys_sendmsg 371 common recvfrom sys_recvfrom compat_sys_recvfrom 372 common recvmsg sys_recvmsg compat_sys_recvmsg 373 common shutdown sys_shutdown sys_shutdown -374 common mlock2 sys_mlock2 compat_sys_mlock2 -375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range +374 common mlock2 sys_mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range sys_copy_file_range 376 common preadv2 sys_preadv2 compat_sys_preadv2 377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 -378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage -379 common statx sys_statx compat_sys_statx -380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi +378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx sys_statx +380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents +383 common rseq sys_rseq sys_rseq +384 common pkey_mprotect sys_pkey_mprotect sys_pkey_mprotect +385 common pkey_alloc sys_pkey_alloc sys_pkey_alloc +386 common pkey_free sys_pkey_free sys_pkey_free +# room for arch specific syscalls +392 64 semtimedop sys_semtimedop - +393 common semget sys_semget sys_semget +394 common semctl sys_semctl compat_sys_semctl +395 common shmget sys_shmget sys_shmget +396 common shmctl sys_shmctl compat_sys_shmctl +397 common shmat sys_shmat compat_sys_shmat +398 common shmdt sys_shmdt sys_shmdt +399 common msgget sys_msgget sys_msgget +400 common msgsnd sys_msgsnd compat_sys_msgsnd +401 common msgrcv sys_msgrcv compat_sys_msgrcv +402 common msgctl sys_msgctl compat_sys_msgctl +403 32 clock_gettime64 - sys_clock_gettime +404 32 clock_settime64 - sys_clock_settime +405 32 clock_adjtime64 - sys_clock_adjtime +406 32 clock_getres_time64 - sys_clock_getres +407 32 clock_nanosleep_time64 - sys_clock_nanosleep +408 32 timer_gettime64 - sys_timer_gettime +409 32 timer_settime64 - sys_timer_settime +410 32 timerfd_gettime64 - sys_timerfd_gettime +411 32 timerfd_settime64 - sys_timerfd_settime +412 32 utimensat_time64 - sys_utimensat +413 32 pselect6_time64 - compat_sys_pselect6_time64 +414 32 ppoll_time64 - compat_sys_ppoll_time64 +416 32 io_pgetevents_time64 - sys_io_pgetevents +417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 +418 32 mq_timedsend_time64 - sys_mq_timedsend +419 32 mq_timedreceive_time64 - sys_mq_timedreceive +420 32 semtimedop_time64 - sys_semtimedop +421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 +422 32 futex_time64 - sys_futex +423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree sys_open_tree +429 common move_mount sys_move_mount sys_move_mount +430 common fsopen sys_fsopen sys_fsopen +431 common fsconfig sys_fsconfig sys_fsconfig +432 common fsmount sys_fsmount sys_fsmount +433 common fspick sys_fspick sys_fspick +434 common pidfd_open sys_pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 sys_clone3 +436 common close_range sys_close_range sys_close_range +437 common openat2 sys_openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 -- cgit v1.3.1 From 23331eeb731a503aaa74d167055eeedc2073ff09 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:17 +0200 Subject: perf tests: Improve topology test to check all aggregation types Improve the topology test to check all aggregation types. This is to lock down the behaviour before 'id' is changed into a struct in later commits. Committer testing: $ perf test topology 41: Session topology: Ok $ $ perf test -v topology 41: Session topology: --- start --- test child forked, pid 965552 templ file: /tmp/perf-test-mO7NtI Problems creating module maps, continuing anyway... CPU 0, core 0, socket 0 CPU 1, core 1, socket 0 CPU 2, core 2, socket 0 CPU 3, core 4, socket 0 CPU 4, core 5, socket 0 CPU 5, core 6, socket 0 CPU 6, core 8, socket 0 CPU 7, core 9, socket 0 CPU 8, core 10, socket 0 CPU 9, core 12, socket 0 CPU 10, core 13, socket 0 CPU 11, core 14, socket 0 CPU 12, core 0, socket 0 CPU 13, core 1, socket 0 CPU 14, core 2, socket 0 CPU 15, core 4, socket 0 CPU 16, core 5, socket 0 CPU 17, core 6, socket 0 CPU 18, core 8, socket 0 CPU 19, core 9, socket 0 CPU 20, core 10, socket 0 CPU 21, core 12, socket 0 CPU 22, core 13, socket 0 CPU 23, core 14, socket 0 test child finished with 0 ---- end ---- Session topology: Ok $ Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 53 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 165feedc7863..f988b3109afc 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -64,10 +64,11 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) .path = path, .mode = PERF_DATA_MODE_READ, }; - int i; + int i, id; session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); + cpu__setup_cpunode_map(); /* On platforms with large numbers of CPUs process_cpu_topology() * might issue an error while reading the perf.data file section @@ -85,11 +86,18 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * "socket_id number is too big. You may need to upgrade the * perf tool." * - * This is the reason why this test might be skipped. + * This is the reason why this test might be skipped. aarch64 and + * s390 always write this part of the header, even when the above + * condition is true (see do_core_id_test in header.c). So always + * run this test on those platforms. */ - if (!session->header.env.cpu) + if (!session->header.env.cpu + && strncmp(session->header.env.arch, "s390", 4) + && strncmp(session->header.env.arch, "aarch64", 7)) return TEST_SKIP; + TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); + for (i = 0; i < session->header.env.nr_cpus_avail; i++) { if (!cpu_map__has(map, i)) continue; @@ -98,14 +106,45 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[i].socket_id); } + // Test that core ID contains socket, die and core + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_core(map, i, NULL); + TEST_ASSERT_VAL("Core map - Core ID doesn't match", + session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id)); + + TEST_ASSERT_VAL("Core map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == + cpu_map__id_to_socket(id)); + + TEST_ASSERT_VAL("Core map - Die ID doesn't match", + session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id)); + } + + // Test that die ID contains socket and die for (i = 0; i < map->nr; i++) { - TEST_ASSERT_VAL("Core ID doesn't match", - (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff))); + id = cpu_map__get_die(map, i, NULL); + TEST_ASSERT_VAL("Die map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == + cpu_map__id_to_socket(id << 16)); + + TEST_ASSERT_VAL("Die map - Die ID doesn't match", + session->header.env.cpu[map->map[i]].die_id == + cpu_map__id_to_die(id << 16)); + } - TEST_ASSERT_VAL("Socket ID doesn't match", - (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL))); + // Test that socket ID contains only socket + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_socket(map, i, NULL); + TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == id); } + // Test that node ID contains only node + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_node(map, i, NULL); + TEST_ASSERT_VAL("Node map - Node ID doesn't match", + cpu__get_node(map->map[i]) == id); + } perf_session__delete(session); return 0; -- cgit v1.3.1 From 91585846f105ef2e3f479a5124a264ebb770f6ab Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:18 +0200 Subject: perf cpumap: Use existing allocator to avoid using malloc Use the existing allocator for perf_cpu_map to avoid use of raw malloc. This could cause an issue in later commits where the size of perf_cpu_map is changed. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index dc5c5e6fc502..20e3a75953fc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -132,15 +132,16 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, int (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data) { - struct perf_cpu_map *c; int nr = cpus->nr; + struct perf_cpu_map *c = perf_cpu_map__empty_new(nr); int cpu, s1, s2; - /* allocate as much as possible */ - c = calloc(1, sizeof(*c) + nr * sizeof(int)); if (!c) return -1; + /* Reset size as it may only be partially filled */ + c->nr = 0; + for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { @@ -155,7 +156,6 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); - refcount_set(&c->refcnt, 1); *res = c; return 0; } -- cgit v1.3.1 From fa265e59b81a09fa3d88f3322b1e44d583cac9b0 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:19 +0200 Subject: perf cpumap: Add new struct for cpu aggregation This struct currently has only a single int member so that it can be used as a drop in replacement for the existing behaviour. Comparison and constructor functions have also been added that will replace usages of '==' and '= -1'. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 18 ++++++++++++++++++ tools/perf/util/cpumap.h | 8 ++++++++ 2 files changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 20e3a75953fc..8624948b4f1d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -586,3 +586,21 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ return online; } + +bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) +{ + return a.id == b.id; +} + +bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) +{ + return a.id == -1; +} + +struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) +{ + struct aggr_cpu_id ret = { + .id = -1 + }; + return ret; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 3a442f021468..1cdccc69cd4b 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -7,6 +7,10 @@ #include #include +struct aggr_cpu_id { + int id; +}; + struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); @@ -64,4 +68,8 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); +bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b); +bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a); +struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void); + #endif /* __PERF_CPUMAP_H */ -- cgit v1.3.1 From 2760f5a14fe7aa466e38bbb92d0284fffc0e4da0 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:20 +0200 Subject: perf stat: Replace aggregation ID with a struct Replace all occurences of the usage of int with the new struct cpu_aggr_id. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-5-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 76 ++++++++++++++++------------- tools/perf/tests/topology.c | 17 ++++--- tools/perf/util/cpumap.c | 82 ++++++++++++++++++------------- tools/perf/util/cpumap.h | 10 ++-- tools/perf/util/stat-display.c | 108 ++++++++++++++++++++++++----------------- tools/perf/util/stat.c | 2 +- tools/perf/util/stat.h | 5 +- 7 files changed, 173 insertions(+), 127 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 89c32692f40c..1ff76d36d1c9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1186,65 +1186,67 @@ static struct option stat_options[] = { OPT_END() }; -static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_socket(map, cpu, NULL); } -static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_die(map, cpu, NULL); } -static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_core(map, cpu, NULL); } -static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_node(map, cpu, NULL); } -static int perf_stat__get_aggr(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) { int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx >= map->nr) - return -1; + return id; cpu = map->map[idx]; if (config->cpus_aggr_map->map[cpu] == -1) - config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); + config->cpus_aggr_map->map[cpu] = get_id(config, map, idx).id; - return config->cpus_aggr_map->map[cpu]; + id.id = config->cpus_aggr_map->map[cpu]; + return id; } -static int perf_stat__get_socket_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } -static int perf_stat__get_die_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); } -static int perf_stat__get_core_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); } -static int perf_stat__get_node_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_node, map, idx); @@ -1345,18 +1347,23 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *m return cpu; } -static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; int cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + + if (cpu != -1) + id.id = env->cpu[cpu].socket_id; - return cpu == -1 ? -1 : env->cpu[cpu].socket_id; + return id; } -static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + int cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { /* @@ -1366,21 +1373,22 @@ static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) * socket + die id */ if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); - die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + id.id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); } - return die_id; + return id; } -static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - int core = -1, cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + int cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { /* @@ -1391,27 +1399,29 @@ static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) * socket + die id + core id */ if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); - core = (env->cpu[cpu].socket_id << 24) | + id.id = (env->cpu[cpu].socket_id << 24) | (env->cpu[cpu].die_id << 16) | (env->cpu[cpu].core_id & 0xffff); } - return core; + return id; } -static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) { int cpu = perf_env__get_cpu(data, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - return perf_env__numa_node(data, cpu); + id.id = perf_env__numa_node(data, cpu); + return id; } static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, @@ -1438,24 +1448,24 @@ static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *c return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); } -static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_die(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_core(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_node(map, idx, &perf_stat.session->header.env); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index f988b3109afc..7a7d16b3950a 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -64,7 +64,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) .path = path, .mode = PERF_DATA_MODE_READ, }; - int i, id; + int i; + struct aggr_cpu_id id; session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); @@ -110,14 +111,14 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_core(map, i, NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id)); + session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id)); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id)); + cpu_map__id_to_socket(id.id)); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id)); + session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); } // Test that die ID contains socket and die @@ -125,25 +126,25 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) id = cpu_map__get_die(map, i, NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id << 16)); + cpu_map__id_to_socket(id.id << 16)); TEST_ASSERT_VAL("Die map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == - cpu_map__id_to_die(id << 16)); + cpu_map__id_to_die(id.id << 16)); } // Test that socket ID contains only socket for (i = 0; i < map->nr; i++) { id = cpu_map__get_socket(map, i, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id); + session->header.env.cpu[map->map[i]].socket_id == id.id); } // Test that node ID contains only node for (i = 0; i < map->nr; i++) { id = cpu_map__get_node(map, i, NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", - cpu__get_node(map->map[i]) == id); + cpu__get_node(map->map[i]) == id.id); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8624948b4f1d..e05a12bde073 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -111,30 +111,37 @@ int cpu_map__get_socket_id(int cpu) return ret ?: value; } -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, + void *data __maybe_unused) { int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; - return cpu_map__get_socket_id(cpu); + id.id = cpu_map__get_socket_id(cpu); + return id; } -static int cmp_ids(const void *a, const void *b) +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) { - return *(int *)a - *(int *)b; + struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; + struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; + + return a->id - b->id; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, - int (*f)(struct perf_cpu_map *map, int cpu, void *data), + struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data) { int nr = cpus->nr; struct perf_cpu_map *c = perf_cpu_map__empty_new(nr); - int cpu, s1, s2; + int cpu, s2; + struct aggr_cpu_id s1; if (!c) return -1; @@ -145,16 +152,16 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { - if (s1 == c->map[s2]) + if (s1.id == c->map[s2]) break; } if (s2 == c->nr) { - c->map[c->nr] = s1; + c->map[c->nr] = s1.id; c->nr++; } } /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(int), cmp_ids); + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id); *res = c; return 0; @@ -167,23 +174,24 @@ int cpu_map__get_die_id(int cpu) return ret ?: value; } -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) { - int cpu, die_id, s; + int cpu, s; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; - die_id = cpu_map__get_die_id(cpu); + id.id = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die_id == -1) - die_id = 0; + if (id.id == -1) + id.id = 0; - s = cpu_map__get_socket(map, idx, data); + s = cpu_map__get_socket(map, idx, data).id; if (s == -1) - return -1; + return cpu_map__empty_aggr_cpu_id(); /* * Encode socket in bit range 15:8 @@ -191,13 +199,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) * we need a global id. So we combine * socket + die id */ - if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n")) - return -1; + if (WARN_ONCE(id.id >> 8, "The die id number is too big.\n")) + return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); - return (s << 8) | (die_id & 0xff); + id.id = (s << 8) | (id.id & 0xff); + return id; } int cpu_map__get_core_id(int cpu) @@ -211,21 +220,22 @@ int cpu_map__get_node_id(int cpu) return cpu__get_node(cpu); } -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) { - int cpu, s_die; + int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; cpu = cpu_map__get_core_id(cpu); - /* s_die is the combination of socket + die id */ - s_die = cpu_map__get_die(map, idx, data); - if (s_die == -1) - return -1; + /* cpu_map__get_die returns the combination of socket + die id */ + id = cpu_map__get_die(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) + return id; /* * encode socket in bit range 31:24 @@ -235,17 +245,21 @@ int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) * socket + die id + core id */ if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) - return -1; + return cpu_map__empty_aggr_cpu_id(); - return (s_die << 16) | (cpu & 0xffff); + id.id = (id.id << 16) | (cpu & 0xffff); + return id; } -int cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) { + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + if (idx < 0 || idx >= map->nr) - return -1; + return id; - return cpu_map__get_node_id(map->map[idx]); + id.id = cpu_map__get_node_id(map->map[idx]); + return id; } int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 1cdccc69cd4b..b8c2288a3f6d 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -19,13 +19,13 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_die_id(int cpu); -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_core_id(int cpu); -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_node_id(int cpu); -int cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp); int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep); int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep); @@ -62,7 +62,7 @@ int cpu__max_present_cpu(void); int cpu__get_node(int cpu); int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, - int (*f)(struct perf_cpu_map *map, int cpu, void *data), + struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data); int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index fee7543843a8..8f4ee3621863 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -68,15 +68,15 @@ static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) static void aggr_printout(struct perf_stat_config *config, - struct evsel *evsel, int id, int nr) + struct evsel *evsel, struct aggr_cpu_id id, int nr) { switch (config->aggr_mode) { case AGGR_CORE: fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - cpu_map__id_to_socket(id), - cpu_map__id_to_die(id), + cpu_map__id_to_socket(id.id), + cpu_map__id_to_die(id.id), config->csv_output ? 0 : -8, - cpu_map__id_to_cpu(id), + cpu_map__id_to_cpu(id.id), config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -84,9 +84,9 @@ static void aggr_printout(struct perf_stat_config *config, break; case AGGR_DIE: fprintf(config->output, "S%d-D%*d%s%*d%s", - cpu_map__id_to_socket(id << 16), + cpu_map__id_to_socket(id.id << 16), config->csv_output ? 0 : -8, - cpu_map__id_to_die(id << 16), + cpu_map__id_to_die(id.id << 16), config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -95,7 +95,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", config->csv_output ? 0 : -5, - id, + id.id, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -104,7 +104,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NODE: fprintf(config->output, "N%*d%s%*d%s", config->csv_output ? 0 : -5, - id, + id.id, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -113,23 +113,23 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", - cpu_map__id_to_socket(id), - cpu_map__id_to_die(id), + cpu_map__id_to_socket(id.id), + cpu_map__id_to_die(id.id), config->csv_output ? 0 : -3, - cpu_map__id_to_cpu(id), config->csv_sep); - } else if (id > -1) { + cpu_map__id_to_cpu(id.id), config->csv_sep); + } else if (id.id > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - evsel__cpus(evsel)->map[id], + evsel__cpus(evsel)->map[id.id], config->csv_sep); } break; case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id), + perf_thread_map__comm(evsel->core.threads, id.id), config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id), + perf_thread_map__pid(evsel->core.threads, id.id), config->csv_sep); break; case AGGR_GLOBAL: @@ -144,7 +144,8 @@ struct outstate { bool newline; const char *prefix; int nfields; - int id, nr; + int nr; + struct aggr_cpu_id id; struct evsel *evsel; }; @@ -319,13 +320,13 @@ static void print_metric_header(struct perf_stat_config *config, } static int first_shadow_cpu(struct perf_stat_config *config, - struct evsel *evsel, int id) + struct evsel *evsel, struct aggr_cpu_id id) { struct evlist *evlist = evsel->evlist; int i; if (config->aggr_mode == AGGR_NONE) - return id; + return id.id; if (!config->aggr_get_id) return 0; @@ -333,14 +334,17 @@ static int first_shadow_cpu(struct perf_stat_config *config, for (i = 0; i < evsel__nr_cpus(evsel); i++) { int cpu2 = evsel__cpus(evsel)->map[i]; - if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id) + if (cpu_map__compare_aggr_cpu_id( + config->aggr_get_id(config, evlist->core.cpus, cpu2), + id)) { return cpu2; + } } return 0; } static void abs_printout(struct perf_stat_config *config, - int id, int nr, struct evsel *evsel, double avg) + struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) { FILE *output = config->output; double sc = evsel->scale; @@ -393,7 +397,7 @@ static bool is_mixed_hw_group(struct evsel *counter) return false; } -static void printout(struct perf_stat_config *config, int id, int nr, +static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st) @@ -496,17 +500,18 @@ static void printout(struct perf_stat_config *config, int id, int nr, static void aggr_update_shadow(struct perf_stat_config *config, struct evlist *evlist) { - int cpu, s2, id, s; + int cpu, s; + struct aggr_cpu_id s2, id; u64 val; struct evsel *counter; for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; + id.id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); - if (s2 != id) + if (!cpu_map__compare_aggr_cpu_id(s2, id)) continue; val += perf_counts(counter->counts, cpu, 0)->val; } @@ -584,7 +589,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter, struct aggr_data { u64 ena, run, val; - int id; + struct aggr_cpu_id id; int nr; int cpu; }; @@ -593,13 +598,14 @@ static void aggr_cb(struct perf_stat_config *config, struct evsel *counter, void *data, bool first) { struct aggr_data *ad = data; - int cpu, s2; + int cpu; + struct aggr_cpu_id s2; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct perf_counts_values *counts; s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); - if (s2 != ad->id) + if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) continue; if (first) ad->nr++; @@ -628,10 +634,11 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_data ad; FILE *output = config->output; u64 ena, run, val; - int id, nr; + int nr; + struct aggr_cpu_id id; double uval; - ad.id = id = config->aggr_map->map[s]; + ad.id.id = id.id = config->aggr_map->map[s]; ad.val = ad.ena = ad.run = 0; ad.nr = 0; if (!collect_data(config, counter, aggr_cb, &ad)) @@ -649,8 +656,12 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix, - run, ena, 1.0, &rt_stat); + if (cpu != -1) { + id = cpu_map__empty_aggr_cpu_id(); + id.id = cpu; + } + printout(config, id, nr, counter, uval, + prefix, run, ena, 1.0, &rt_stat); if (!metric_only) fputc('\n', output); } @@ -728,7 +739,8 @@ static struct perf_aggr_thread_value *sort_aggr_thread( continue; buf[i].counter = counter; - buf[i].id = thread; + buf[i].id = cpu_map__empty_aggr_cpu_id(); + buf[i].id.id = thread; buf[i].uval = uval; buf[i].val = val; buf[i].run = run; @@ -751,7 +763,8 @@ static void print_aggr_thread(struct perf_stat_config *config, FILE *output = config->output; int nthreads = perf_thread_map__nr(counter->core.threads); int ncpus = perf_cpu_map__nr(counter->core.cpus); - int thread, sorted_threads, id; + int thread, sorted_threads; + struct aggr_cpu_id id; struct perf_aggr_thread_value *buf; buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); @@ -768,7 +781,7 @@ static void print_aggr_thread(struct perf_stat_config *config, if (config->stats) printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, - &config->stats[id]); + &config->stats[id.id]); else printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, @@ -814,8 +827,8 @@ static void print_counter_aggr(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, - cd.avg, &rt_stat); + printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running, + cd.avg_enabled, cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); } @@ -842,6 +855,7 @@ static void print_counter(struct perf_stat_config *config, u64 ena, run, val; double uval; int cpu; + struct aggr_cpu_id id; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; @@ -856,8 +870,10 @@ static void print_counter(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + id = cpu_map__empty_aggr_cpu_id(); + id.id = cpu; + printout(config, id, 0, counter, uval, prefix, + run, ena, 1.0, &rt_stat); fputc('\n', output); } @@ -872,6 +888,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evsel *counter; u64 ena, run, val; double uval; + struct aggr_cpu_id id; nrcpus = evlist->core.cpus->nr; for (cpu = 0; cpu < nrcpus; cpu++) { @@ -880,8 +897,10 @@ static void print_no_aggr_metric(struct perf_stat_config *config, if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { + id = cpu_map__empty_aggr_cpu_id(); + id.id = cpu; if (first) { - aggr_printout(config, counter, cpu, 0); + aggr_printout(config, counter, id, 0); first = false; } val = perf_counts(counter->counts, cpu, 0)->val; @@ -889,8 +908,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + printout(config, id, 0, counter, uval, prefix, + run, ena, 1.0, &rt_stat); } fputc('\n', config->output); } @@ -1140,14 +1159,15 @@ static void print_footer(struct perf_stat_config *config) static void print_percore_thread(struct perf_stat_config *config, struct evsel *counter, char *prefix) { - int s, s2, id; + int s; + struct aggr_cpu_id s2, id; bool first = true; for (int i = 0; i < evsel__nr_cpus(counter); i++) { s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - if (s2 == id) + id.id = config->aggr_map->map[s]; + if (cpu_map__compare_aggr_cpu_id(s2, id)) break; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1e125e39ff84..1d5c9f98396b 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -313,7 +313,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL); + s = cpu_map__get_socket(cpus, cpu, NULL).id; if (s < 0) return -1; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 9979b4b100f2..87522ffb2db2 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -6,6 +6,7 @@ #include #include #include +#include "cpumap.h" #include "rblist.h" struct perf_cpu_map; @@ -99,7 +100,7 @@ struct runtime_stat { struct rblist value_list; }; -typedef int (*aggr_get_id_t)(struct perf_stat_config *config, +typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu_map *m, int cpu); struct perf_stat_config { @@ -170,7 +171,7 @@ struct evlist; struct perf_aggr_thread_value { struct evsel *counter; - int id; + struct aggr_cpu_id id; double uval; u64 val; u64 run; -- cgit v1.3.1 From cea6575fdccfc0624ca42f656e16e6b4d9bb48a5 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:21 +0200 Subject: perf cpumap: Add new map type for aggregation Currently this is a duplicate of perf_cpu_map so that it can be used as a drop in replacement. In a later commit it will be changed from a map of ints to use the new cpu_aggr_id struct. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-6-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 17 +++++++++++++++++ tools/perf/util/cpumap.h | 8 ++++++++ 2 files changed, 25 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index e05a12bde073..b18e53506656 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -95,6 +95,23 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) return cpus; } +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) +{ + struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr); + + if (cpus != NULL) { + int i; + + cpus->nr = nr; + for (i = 0; i < nr; i++) + cpus->map[i] = -1; + + refcount_set(&cpus->refcnt, 1); + } + + return cpus; +} + static int cpu__get_topology_int(int cpu, const char *name, int *value) { char path[PATH_MAX]; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index b8c2288a3f6d..ebd65c4f431b 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -11,9 +11,17 @@ struct aggr_cpu_id { int id; }; +struct cpu_aggr_map { + refcount_t refcnt; + int nr; + int map[]; +}; + struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); + struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); -- cgit v1.3.1 From d526e1a033e03ec4515b1800f99d99a35c7ea790 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:22 +0200 Subject: perf cpumap: Drop in cpu_aggr_map struct Replace usages of perf_cpu_map with cpu_aggr map in places that are involved with 'perf stat' aggregation. This will then later be changed to be a map of cpu_aggr_id rather than an int so that more data can be stored. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: John Garry Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mark Rutland Cc: Alexander Shishkin Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-7-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 29 ++++++++++++++++++++++------- tools/perf/util/cpumap.c | 12 ++++++------ tools/perf/util/cpumap.h | 10 +++++----- tools/perf/util/stat.h | 4 ++-- 4 files changed, 35 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1ff76d36d1c9..cc017c7dcee9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1320,14 +1320,29 @@ static int perf_stat_init_aggr_mode(void) * the aggregation translate cpumap. */ nr = perf_cpu_map__max(evsel_list->core.cpus); - stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1); + stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } +static void cpu_aggr_map__delete(struct cpu_aggr_map *map) +{ + if (map) { + WARN_ONCE(refcount_read(&map->refcnt) != 0, + "cpu_aggr_map refcnt unbalanced\n"); + free(map); + } +} + +static void cpu_aggr_map__put(struct cpu_aggr_map *map) +{ + if (map && refcount_dec_and_test(&map->refcnt)) + cpu_aggr_map__delete(map); +} + static void perf_stat__exit_aggr_mode(void) { - perf_cpu_map__put(stat_config.aggr_map); - perf_cpu_map__put(stat_config.cpus_aggr_map); + cpu_aggr_map__put(stat_config.aggr_map); + cpu_aggr_map__put(stat_config.cpus_aggr_map); stat_config.aggr_map = NULL; stat_config.cpus_aggr_map = NULL; } @@ -1425,25 +1440,25 @@ static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, } static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **sockp) + struct cpu_aggr_map **sockp) { return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); } static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **diep) + struct cpu_aggr_map **diep) { return cpu_map__build_map(cpus, diep, perf_env__get_die, env); } static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **corep) + struct cpu_aggr_map **corep) { return cpu_map__build_map(cpus, corep, perf_env__get_core, env); } static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **nodep) + struct cpu_aggr_map **nodep) { return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index b18e53506656..ea81586305f4 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -151,12 +151,12 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->id - b->id; } -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data) { int nr = cpus->nr; - struct perf_cpu_map *c = perf_cpu_map__empty_new(nr); + struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr); int cpu, s2; struct aggr_cpu_id s1; @@ -279,22 +279,22 @@ struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *da return id; } -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp) +int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep) +int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) { return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); } -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep) +int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) { return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); } -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct perf_cpu_map **numap) +int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap) { return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL); } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index ebd65c4f431b..b112069038be 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -34,10 +34,10 @@ int cpu_map__get_core_id(int cpu); struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_node_id(int cpu); struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp); -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep); -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep); -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct perf_cpu_map **nodep); +int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); +int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); +int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); +int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) @@ -69,7 +69,7 @@ int cpu__max_cpu(void); int cpu__max_present_cpu(void); int cpu__get_node(int cpu); -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 87522ffb2db2..b5369730b4a2 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -139,9 +139,9 @@ struct perf_stat_config { const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; - struct perf_cpu_map *aggr_map; + struct cpu_aggr_map *aggr_map; aggr_get_id_t aggr_get_id; - struct perf_cpu_map *cpus_aggr_map; + struct cpu_aggr_map *cpus_aggr_map; u64 *walltime_run; struct rblist metric_events; int ctl_fd; -- cgit v1.3.1 From ff5232956e074994a66656f709c3ad1ee3d8a550 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:23 +0200 Subject: perf stat aggregation: Start using cpu_aggr_id in map Use the new cpu_aggr_id struct in the cpu map instead of int so that it can store more data. No functional changes. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mark Rutland Cc: Alexander Shishkin Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-8-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 6 +++--- tools/perf/util/cpumap.c | 8 ++++---- tools/perf/util/cpumap.h | 2 +- tools/perf/util/stat-display.c | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cc017c7dcee9..3fec1b7e93b5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1221,10 +1221,10 @@ static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, cpu = map->map[idx]; - if (config->cpus_aggr_map->map[cpu] == -1) - config->cpus_aggr_map->map[cpu] = get_id(config, map, idx).id; + if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu])) + config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); - id.id = config->cpus_aggr_map->map[cpu]; + id = config->cpus_aggr_map->map[cpu]; return id; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index ea81586305f4..b50609b9a585 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -97,14 +97,14 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) { - struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr); + struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr); if (cpus != NULL) { int i; cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = -1; + cpus->map[i] = cpu_map__empty_aggr_cpu_id(); refcount_set(&cpus->refcnt, 1); } @@ -169,11 +169,11 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { - if (s1.id == c->map[s2]) + if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2])) break; } if (s2 == c->nr) { - c->map[c->nr] = s1.id; + c->map[c->nr] = s1; c->nr++; } } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index b112069038be..d8fc265bc762 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -14,7 +14,7 @@ struct aggr_cpu_id { struct cpu_aggr_map { refcount_t refcnt; int nr; - int map[]; + struct aggr_cpu_id map[]; }; struct perf_record_cpu_map_data; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 8f4ee3621863..a6309cedb37b 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -506,7 +506,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, struct evsel *counter; for (s = 0; s < config->aggr_map->nr; s++) { - id.id = config->aggr_map->map[s]; + id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { @@ -638,7 +638,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_cpu_id id; double uval; - ad.id.id = id.id = config->aggr_map->map[s]; + ad.id = id = config->aggr_map->map[s]; ad.val = ad.ena = ad.run = 0; ad.nr = 0; if (!collect_data(config, counter, aggr_cb, &ad)) @@ -1166,7 +1166,7 @@ static void print_percore_thread(struct perf_stat_config *config, for (int i = 0; i < evsel__nr_cpus(counter); i++) { s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { - id.id = config->aggr_map->map[s]; + id = config->aggr_map->map[s]; if (cpu_map__compare_aggr_cpu_id(s2, id)) break; } -- cgit v1.3.1 From fcd83a35dd93b89d3f48cfcd33c31b112cc96180 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:24 +0200 Subject: perf stat aggregation: Add separate node member Add node as a separate member so that it doesn't have to be packed into the int value. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-9-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- tools/perf/tests/topology.c | 8 +++++++- tools/perf/util/cpumap.c | 16 +++++++++++----- tools/perf/util/cpumap.h | 1 + tools/perf/util/stat-display.c | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3fec1b7e93b5..e51c5469d712 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1435,7 +1435,7 @@ static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, int cpu = perf_env__get_cpu(data, map, idx); struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - id.id = perf_env__numa_node(data, cpu); + id.node = perf_env__numa_node(data, cpu); return id; } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 7a7d16b3950a..ee272d45a445 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -119,6 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); + TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); } // Test that die ID contains socket and die @@ -131,6 +132,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Die map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id << 16)); + + TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); } // Test that socket ID contains only socket @@ -138,13 +141,16 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) id = cpu_map__get_socket(map, i, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == id.id); + + TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); } // Test that node ID contains only node for (i = 0; i < map->nr; i++) { id = cpu_map__get_node(map, i, NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", - cpu__get_node(map->map[i]) == id.id); + cpu__get_node(map->map[i]) == id.node); + TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index b50609b9a585..5f9e98ddbe34 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -148,7 +148,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; - return a->id - b->id; + if (a->id != b->id) + return a->id - b->id; + else + return a->node - b->node; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -275,7 +278,7 @@ struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *da if (idx < 0 || idx >= map->nr) return id; - id.id = cpu_map__get_node_id(map->map[idx]); + id.node = cpu_map__get_node_id(map->map[idx]); return id; } @@ -620,18 +623,21 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { - return a.id == b.id; + return a.id == b.id && + a.node == b.node; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { - return a.id == -1; + return a.id == -1 && + a.node == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) { struct aggr_cpu_id ret = { - .id = -1 + .id = -1, + .node = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index d8fc265bc762..f79e92603024 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -9,6 +9,7 @@ struct aggr_cpu_id { int id; + int node; }; struct cpu_aggr_map { diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a6309cedb37b..790392247026 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -104,7 +104,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NODE: fprintf(config->output, "N%*d%s%*d%s", config->csv_output ? 0 : -5, - id.id, + id.node, config->csv_sep, config->csv_output ? 0 : 4, nr, -- cgit v1.3.1 From 1a270cb6b3cc18663f7fd165aa691c48d68739f2 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:25 +0200 Subject: perf stat aggregation: Add separate socket member Add socket as a separate member so that it doesn't have to be packed into the int value. When the socket ID was larger than 8 bits the output appeared corrupted or incomplete. For example, here on ThunderX2 'perf stat' reports a socket of -1 and an invalid die number: ./perf stat -a --per-die The socket id number is too big. Performance counter stats for 'system wide': S-1-D255 128 687.99 msec cpu-clock # 57.240 CPUs utilized ... S36-D0 128 842.34 msec cpu-clock # 70.081 CPUs utilized ... And with --per-core there is an entry with an invalid core ID: ./perf stat record -a --per-core The socket id number is too big. Performance counter stats for 'system wide': S-1-D255-C65535 128 671.04 msec cpu-clock # 54.112 CPUs utilized ... S36-D0-C0 4 28.27 msec cpu-clock # 2.279 CPUs utilized ... This fixes the "Session topology" self test on ThunderX2. After this fix the output contains the correct socket and die IDs and no longer prints a warning about the size of the socket ID: ./perf stat --per-die -a Performance counter stats for 'system wide': S36-D0 128 169,869.39 msec cpu-clock # 127.501 CPUs utilized ... S3612-D0 128 169,733.05 msec cpu-clock # 127.398 CPUs utilized Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-10-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 ++++++++------------- tools/perf/tests/topology.c | 10 +++++----- tools/perf/util/cpumap.c | 44 +++++++++++++++++++++--------------------- tools/perf/util/cpumap.h | 6 +----- tools/perf/util/stat-display.c | 8 ++++---- tools/perf/util/stat.c | 2 +- 6 files changed, 41 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e51c5469d712..6248baa0f612 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1369,7 +1369,7 @@ static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (cpu != -1) - id.id = env->cpu[cpu].socket_id; + id.socket = env->cpu[cpu].socket_id; return id; } @@ -1382,18 +1382,16 @@ static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, v if (cpu != -1) { /* - * Encode socket in bit range 15:8 - * die_id is relative to socket, - * we need a global id. So we combine - * socket + die id + * die_id is relative to socket, so start + * with the socket ID and then add die to + * make a unique ID. */ - if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); + id.socket = env->cpu[cpu].socket_id; if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + id.id = env->cpu[cpu].die_id & 0xff; } return id; @@ -1407,23 +1405,19 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, if (cpu != -1) { /* - * Encode socket in bit range 31:24 * encode die id in bit range 23:16 * core_id is relative to socket and die, * we need a global id. So we combine * socket + die id + core id */ - if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (env->cpu[cpu].socket_id << 24) | - (env->cpu[cpu].die_id << 16) | + id.socket = env->cpu[cpu].socket_id; + id.id = (env->cpu[cpu].die_id << 16) | (env->cpu[cpu].core_id & 0xffff); } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ee272d45a445..777dd8291bcc 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -114,8 +114,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id)); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id.id)); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); @@ -126,8 +125,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_die(map, i, NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id.id << 16)); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == @@ -140,9 +138,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_socket(map, i, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.id); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); } // Test that node ID contains only node @@ -151,6 +150,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Node map - Node ID doesn't match", cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 5f9e98ddbe34..d2630f03f682 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -139,7 +139,7 @@ struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, cpu = map->map[idx]; - id.id = cpu_map__get_socket_id(cpu); + id.socket = cpu_map__get_socket_id(cpu); return id; } @@ -150,8 +150,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) if (a->id != b->id) return a->id - b->id; - else + else if (a->node != b->node) return a->node - b->node; + else + return a->socket - b->socket; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -196,7 +198,7 @@ int cpu_map__get_die_id(int cpu) struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) { - int cpu, s; + int cpu, die; struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) @@ -204,28 +206,24 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat cpu = map->map[idx]; - id.id = cpu_map__get_die_id(cpu); + die = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (id.id == -1) - id.id = 0; - - s = cpu_map__get_socket(map, idx, data).id; - if (s == -1) - return cpu_map__empty_aggr_cpu_id(); + if (die == -1) + die = 0; /* - * Encode socket in bit range 15:8 - * die_id is relative to socket, and - * we need a global id. So we combine - * socket + die id + * die_id is relative to socket, so start + * with the socket ID and then add die to + * make a unique ID. */ - if (WARN_ONCE(id.id >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); + id = cpu_map__get_socket(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) + return id; - if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) + if (WARN_ONCE(die >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (s << 8) | (id.id & 0xff); + id.id = (die & 0xff); return id; } @@ -258,7 +256,6 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da return id; /* - * encode socket in bit range 31:24 * encode die id in bit range 23:16 * core_id is relative to socket and die, * we need a global id. So we combine @@ -624,20 +621,23 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { return a.id == b.id && - a.node == b.node; + a.node == b.node && + a.socket == b.socket; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { return a.id == -1 && - a.node == -1; + a.node == -1 && + a.socket == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) { struct aggr_cpu_id ret = { .id = -1, - .node = -1 + .node = -1, + .socket = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f79e92603024..0123ecc90694 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -10,6 +10,7 @@ struct aggr_cpu_id { int id; int node; + int socket; }; struct cpu_aggr_map { @@ -48,11 +49,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) return sock->map[s]; } -static inline int cpu_map__id_to_socket(int id) -{ - return id >> 24; -} - static inline int cpu_map__id_to_die(int id) { return (id >> 16) & 0xff; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 790392247026..5a756c88c124 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -73,7 +73,7 @@ static void aggr_printout(struct perf_stat_config *config, switch (config->aggr_mode) { case AGGR_CORE: fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - cpu_map__id_to_socket(id.id), + id.socket, cpu_map__id_to_die(id.id), config->csv_output ? 0 : -8, cpu_map__id_to_cpu(id.id), @@ -84,7 +84,7 @@ static void aggr_printout(struct perf_stat_config *config, break; case AGGR_DIE: fprintf(config->output, "S%d-D%*d%s%*d%s", - cpu_map__id_to_socket(id.id << 16), + id.socket, config->csv_output ? 0 : -8, cpu_map__id_to_die(id.id << 16), config->csv_sep, @@ -95,7 +95,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", config->csv_output ? 0 : -5, - id.id, + id.socket, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -113,7 +113,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", - cpu_map__id_to_socket(id.id), + id.socket, cpu_map__id_to_die(id.id), config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id.id), config->csv_sep); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1d5c9f98396b..8ce1479c98f0 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -313,7 +313,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL).id; + s = cpu_map__get_socket(cpus, cpu, NULL).socket; if (s < 0) return -1; -- cgit v1.3.1 From ba2ee166d92b201078cb941956547ab9828989d3 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:26 +0200 Subject: perf stat aggregation: Add separate die member Add die as a separate member so that it doesn't have to be packed into the int value. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-11-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 14 +++----------- tools/perf/tests/topology.c | 8 +++++--- tools/perf/util/cpumap.c | 28 ++++++++++++++-------------- tools/perf/util/cpumap.h | 6 +----- tools/perf/util/stat-display.c | 6 +++--- 5 files changed, 26 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6248baa0f612..bac37fe9373c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1387,11 +1387,7 @@ static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, v * make a unique ID. */ id.socket = env->cpu[cpu].socket_id; - - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - - id.id = env->cpu[cpu].die_id & 0xff; + id.die = env->cpu[cpu].die_id; } return id; @@ -1405,20 +1401,16 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, if (cpu != -1) { /* - * encode die id in bit range 23:16 * core_id is relative to socket and die, * we need a global id. So we combine * socket + die id + core id */ - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); id.socket = env->cpu[cpu].socket_id; - id.id = (env->cpu[cpu].die_id << 16) | - (env->cpu[cpu].core_id & 0xffff); + id.die = env->cpu[cpu].die_id; + id.id = env->cpu[cpu].core_id & 0xffff; } return id; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 777dd8291bcc..e3f822890a84 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -117,7 +117,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); + session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); } @@ -128,10 +128,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == - cpu_map__id_to_die(id.id << 16)); + session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Die map - ID is set", id.id == -1); } // Test that socket ID contains only socket @@ -141,6 +141,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); } @@ -151,6 +152,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); + TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index d2630f03f682..10a52058d838 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -152,8 +152,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->id - b->id; else if (a->node != b->node) return a->node - b->node; - else + else if (a->socket != b->socket) return a->socket - b->socket; + else + return a->die - b->die; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -220,10 +222,7 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat if (cpu_map__aggr_cpu_id_is_empty(id)) return id; - if (WARN_ONCE(die >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - - id.id = (die & 0xff); + id.die = die; return id; } @@ -250,21 +249,19 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da cpu = cpu_map__get_core_id(cpu); - /* cpu_map__get_die returns the combination of socket + die id */ + /* cpu_map__get_die returns a struct with socket and die set*/ id = cpu_map__get_die(map, idx, data); if (cpu_map__aggr_cpu_id_is_empty(id)) return id; /* - * encode die id in bit range 23:16 - * core_id is relative to socket and die, - * we need a global id. So we combine - * socket + die id + core id + * core_id is relative to socket and die, we need a global id. + * So we combine the result from cpu_map__get_die with the core id */ if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (id.id << 16) | (cpu & 0xffff); + id.id = (cpu & 0xffff); return id; } @@ -622,14 +619,16 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { return a.id == b.id && a.node == b.node && - a.socket == b.socket; + a.socket == b.socket && + a.die == b.die; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { return a.id == -1 && a.node == -1 && - a.socket == -1; + a.socket == -1 && + a.die == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) @@ -637,7 +636,8 @@ struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) struct aggr_cpu_id ret = { .id = -1, .node = -1, - .socket = -1 + .socket = -1, + .die = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 0123ecc90694..51bbe1eca3f4 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -11,6 +11,7 @@ struct aggr_cpu_id { int id; int node; int socket; + int die; }; struct cpu_aggr_map { @@ -49,11 +50,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) return sock->map[s]; } -static inline int cpu_map__id_to_die(int id) -{ - return (id >> 16) & 0xff; -} - static inline int cpu_map__id_to_cpu(int id) { return id & 0xffff; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 5a756c88c124..dcce753f351d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -74,7 +74,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_CORE: fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", id.socket, - cpu_map__id_to_die(id.id), + id.die, config->csv_output ? 0 : -8, cpu_map__id_to_cpu(id.id), config->csv_sep, @@ -86,7 +86,7 @@ static void aggr_printout(struct perf_stat_config *config, fprintf(config->output, "S%d-D%*d%s%*d%s", id.socket, config->csv_output ? 0 : -8, - cpu_map__id_to_die(id.id << 16), + id.die, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -114,7 +114,7 @@ static void aggr_printout(struct perf_stat_config *config, if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", id.socket, - cpu_map__id_to_die(id.id), + id.die, config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id.id), config->csv_sep); } else if (id.id > -1) { -- cgit v1.3.1 From b993381779da406ca9ca0ae1e1b3968e9075ce77 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:27 +0200 Subject: perf stat aggregation: Add separate core member Add core as a separate member so that it doesn't have to be packed into the int value. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-12-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 +++------ tools/perf/tests/topology.c | 6 +++++- tools/perf/util/cpumap.c | 18 ++++++++++-------- tools/perf/util/cpumap.h | 6 +----- tools/perf/util/stat-display.c | 16 ++++++++-------- 5 files changed, 27 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index bac37fe9373c..8cc24967bc27 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1402,15 +1402,12 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, if (cpu != -1) { /* * core_id is relative to socket and die, - * we need a global id. So we combine - * socket + die id + core id + * we need a global id. So we set + * socket, die id and core id */ - if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - id.socket = env->cpu[cpu].socket_id; id.die = env->cpu[cpu].die_id; - id.id = env->cpu[cpu].core_id & 0xffff; + id.core = env->cpu[cpu].core_id; } return id; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index e3f822890a84..a6e289f2c6db 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -111,7 +111,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_core(map, i, NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id)); + session->header.env.cpu[map->map[i]].core_id == id.core); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", session->header.env.cpu[map->map[i]].socket_id == id.socket); @@ -119,6 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Core map - ID is set", id.id == -1); } // Test that die ID contains socket and die @@ -132,6 +133,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); } // Test that socket ID contains only socket @@ -143,6 +145,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); } // Test that node ID contains only node @@ -153,6 +156,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); + TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 10a52058d838..d164f7bd1ac7 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -154,8 +154,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->node - b->node; else if (a->socket != b->socket) return a->socket - b->socket; - else + else if (a->die != b->die) return a->die - b->die; + else + return a->core - b->core; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -258,10 +260,7 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da * core_id is relative to socket and die, we need a global id. * So we combine the result from cpu_map__get_die with the core id */ - if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - - id.id = (cpu & 0xffff); + id.core = cpu; return id; } @@ -620,7 +619,8 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) return a.id == b.id && a.node == b.node && a.socket == b.socket && - a.die == b.die; + a.die == b.die && + a.core == b.core; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) @@ -628,7 +628,8 @@ bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) return a.id == -1 && a.node == -1 && a.socket == -1 && - a.die == -1; + a.die == -1 && + a.core == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) @@ -637,7 +638,8 @@ struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) .id = -1, .node = -1, .socket = -1, - .die = -1 + .die = -1, + .core = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 51bbe1eca3f4..1bb8f7d47206 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -12,6 +12,7 @@ struct aggr_cpu_id { int node; int socket; int die; + int core; }; struct cpu_aggr_map { @@ -50,11 +51,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) return sock->map[s]; } -static inline int cpu_map__id_to_cpu(int id) -{ - return id & 0xffff; -} - int cpu__setup_cpunode_map(void); int cpu__max_node(void); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index dcce753f351d..2b3842f6080d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -76,7 +76,7 @@ static void aggr_printout(struct perf_stat_config *config, id.socket, id.die, config->csv_output ? 0 : -8, - cpu_map__id_to_cpu(id.id), + id.core, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -116,11 +116,11 @@ static void aggr_printout(struct perf_stat_config *config, id.socket, id.die, config->csv_output ? 0 : -3, - cpu_map__id_to_cpu(id.id), config->csv_sep); - } else if (id.id > -1) { + id.core, config->csv_sep); + } else if (id.core > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - evsel__cpus(evsel)->map[id.id], + evsel__cpus(evsel)->map[id.core], config->csv_sep); } break; @@ -326,7 +326,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, int i; if (config->aggr_mode == AGGR_NONE) - return id.id; + return id.core; if (!config->aggr_get_id) return 0; @@ -658,7 +658,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; if (cpu != -1) { id = cpu_map__empty_aggr_cpu_id(); - id.id = cpu; + id.core = cpu; } printout(config, id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -871,7 +871,7 @@ static void print_counter(struct perf_stat_config *config, uval = val * counter->scale; id = cpu_map__empty_aggr_cpu_id(); - id.id = cpu; + id.core = cpu; printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -898,7 +898,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { id = cpu_map__empty_aggr_cpu_id(); - id.id = cpu; + id.core = cpu; if (first) { aggr_printout(config, counter, id, 0); first = false; -- cgit v1.3.1 From 8d4852b468c38168c4e1e1652602b4a6c6c080b3 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 26 Nov 2020 16:13:28 +0200 Subject: perf stat aggregation: Add separate thread member A separate field isn't strictly required. The core field could be re-used for thread IDs as a single field was used previously. But separating them will avoid confusion and catch potential errors where core IDs are read as thread IDs and vice versa. Also remove the placeholder id field which is now no longer used. Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-13-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 8 ++++---- tools/perf/util/cpumap.c | 14 +++++++------- tools/perf/util/cpumap.h | 2 +- tools/perf/util/stat-display.c | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index a6e289f2c6db..74748ed75b2c 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -119,7 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); - TEST_ASSERT_VAL("Core map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1); } // Test that die ID contains socket and die @@ -132,8 +132,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); - TEST_ASSERT_VAL("Die map - ID is set", id.id == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1); } // Test that socket ID contains only socket @@ -144,8 +144,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); - TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1); } // Test that node ID contains only node @@ -153,10 +153,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) id = cpu_map__get_node(map, i, NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", cpu__get_node(map->map[i]) == id.node); - TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index d164f7bd1ac7..87d3eca9b872 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -148,16 +148,16 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; - if (a->id != b->id) - return a->id - b->id; - else if (a->node != b->node) + if (a->node != b->node) return a->node - b->node; else if (a->socket != b->socket) return a->socket - b->socket; else if (a->die != b->die) return a->die - b->die; - else + else if (a->core != b->core) return a->core - b->core; + else + return a->thread - b->thread; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -616,7 +616,7 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { - return a.id == b.id && + return a.thread == b.thread && a.node == b.node && a.socket == b.socket && a.die == b.die && @@ -625,7 +625,7 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { - return a.id == -1 && + return a.thread == -1 && a.node == -1 && a.socket == -1 && a.die == -1 && @@ -635,7 +635,7 @@ bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) { struct aggr_cpu_id ret = { - .id = -1, + .thread = -1, .node = -1, .socket = -1, .die = -1, diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 1bb8f7d47206..a27eeaf086e8 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -8,7 +8,7 @@ #include struct aggr_cpu_id { - int id; + int thread; int node; int socket; int die; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 2b3842f6080d..583ae4f09c5d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -127,9 +127,9 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id.id), + perf_thread_map__comm(evsel->core.threads, id.thread), config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id.id), + perf_thread_map__pid(evsel->core.threads, id.thread), config->csv_sep); break; case AGGR_GLOBAL: @@ -740,7 +740,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( buf[i].counter = counter; buf[i].id = cpu_map__empty_aggr_cpu_id(); - buf[i].id.id = thread; + buf[i].id.thread = thread; buf[i].uval = uval; buf[i].val = val; buf[i].run = run; @@ -781,7 +781,7 @@ static void print_aggr_thread(struct perf_stat_config *config, if (config->stats) printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, - &config->stats[id.id]); + &config->stats[id.thread]); else printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, -- cgit v1.3.1 From 5149303fdfe5c67ddb51c911e23262f781cd75eb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 24 Dec 2020 10:52:10 -0300 Subject: perf probe: Fix memory leak when synthesizing SDT probes The argv_split() function must be paired with argv_free(), else we must keep a reference to the argv array received or do the freeing ourselves, in synthesize_sdt_probe_command() we were simply leaking that argv[] array. Fixes: 3b1f8311f6963cd1 ("perf probe: Add sdt probes arguments into the uprobe cmd string") Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Truong Cc: Alexis Berlemont Cc: He Zhe Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Will Deacon Link: https://lore.kernel.org/r/20201224135139.GF477817@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 064b63a6a3f3..bbecb449ea94 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -791,7 +791,7 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, const char *sdtgrp) { struct strbuf buf; - char *ret = NULL, **args; + char *ret = NULL; int i, args_count, err; unsigned long long ref_ctr_offset; @@ -813,12 +813,19 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, goto out; if (note->args) { - args = argv_split(note->args, &args_count); + char **args = argv_split(note->args, &args_count); + + if (args == NULL) + goto error; for (i = 0; i < args_count; ++i) { - if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) + if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) { + argv_free(args); goto error; + } } + + argv_free(args); } out: -- cgit v1.3.1 From da4282c17d695b9311608aa63b3c633e649aadea Mon Sep 17 00:00:00 2001 From: Jiang Wang Date: Thu, 24 Dec 2020 01:12:42 +0000 Subject: selftests/bpf: Fix a compile error for BPF_F_BPRM_SECUREEXEC When CONFIG_BPF_LSM is not configured, running bpf selftesting will show BPF_F_BPRM_SECUREEXEC undefined error for bprm_opts.c. The problem is that bprm_opts.c includes vmliunx.h. The vmlinux.h is generated by "bpftool btf dump file ./vmlinux format c". On the other hand, BPF_F_BPRM_SECUREEXEC is defined in include/uapi/linux/bpf.h and used only in bpf_lsm.c. When CONFIG_BPF_LSM is not set, bpf_lsm will not be compiled, so vmlinux.h will not include definition of BPF_F_BPRM_SECUREEXEC. Ideally, we want to compile bpf selftest regardless of the configuration setting, so change the include file from vmlinux.h to bpf.h. Signed-off-by: Jiang Wang Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201224011242.585967-1-jiang.wang@bytedance.com --- tools/testing/selftests/bpf/progs/bprm_opts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c index 5bfef2887e70..418d9c6d4952 100644 --- a/tools/testing/selftests/bpf/progs/bprm_opts.c +++ b/tools/testing/selftests/bpf/progs/bprm_opts.c @@ -4,7 +4,7 @@ * Copyright 2020 Google LLC. */ -#include "vmlinux.h" +#include #include #include #include -- cgit v1.3.1 From 7cf22a1c88c05ea3807f95b1edfebb729016ae52 Mon Sep 17 00:00:00 2001 From: Harish Date: Tue, 29 Dec 2020 15:14:22 -0800 Subject: selftests/vm: fix building protection keys test Commit d8cbe8bfa7d ("tools/testing/selftests/vm: fix build error") tried to include a ARCH check for powerpc, however ARCH is not defined in the Makefile before including lib.mk. This makes test building to skip on both x86 and powerpc. Fix the arch check by replacing it using machine type as it is already defined and used in the test. Link: https://lkml.kernel.org/r/20201215100402.257376-1-harish@linux.ibm.com Fixes: d8cbe8bfa7d ("tools/testing/selftests/vm: fix build error") Signed-off-by: Harish Reviewed-by: Sandipan Das Cc: Shuah Khan Cc: Sandipan Das Cc: John Hubbard Cc: Dave Hansen Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 9a25307f6115..d42115e4284d 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -4,7 +4,7 @@ include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) -MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') +MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') # Without this, failed build products remain, with up-to-date timestamps, # thus tricking Make (and you!) into believing that All Is Well, in subsequent @@ -43,7 +43,7 @@ TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -ifeq ($(ARCH),x86_64) +ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) @@ -65,13 +65,13 @@ TEST_GEN_FILES += $(BINARIES_64) endif else -ifneq (,$(findstring $(ARCH),powerpc)) +ifneq (,$(findstring $(MACHINE),ppc64)) TEST_GEN_FILES += protection_keys endif endif -ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) +ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs @@ -84,7 +84,7 @@ TEST_FILES := test_vmalloc.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk -ifeq ($(ARCH),x86_64) +ifeq ($(MACHINE),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -- cgit v1.3.1 From f6e9ceb7a7fc321a31a9dde93a99b7b4b016a3b3 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Wed, 30 Dec 2020 17:52:04 +0800 Subject: selftests: xfrm: fix test return value override issue in xfrm_policy.sh When running this xfrm_policy.sh test script, even with some cases marked as FAIL, the overall test result will still be PASS: $ sudo ./xfrm_policy.sh PASS: policy before exception matches FAIL: expected ping to .254 to fail (exceptions) PASS: direct policy matches (exceptions) PASS: policy matches (exceptions) FAIL: expected ping to .254 to fail (exceptions and block policies) PASS: direct policy matches (exceptions and block policies) PASS: policy matches (exceptions and block policies) FAIL: expected ping to .254 to fail (exceptions and block policies after hresh changes) PASS: direct policy matches (exceptions and block policies after hresh changes) PASS: policy matches (exceptions and block policies after hresh changes) FAIL: expected ping to .254 to fail (exceptions and block policies after hthresh change in ns3) PASS: direct policy matches (exceptions and block policies after hthresh change in ns3) PASS: policy matches (exceptions and block policies after hthresh change in ns3) FAIL: expected ping to .254 to fail (exceptions and block policies after htresh change to normal) PASS: direct policy matches (exceptions and block policies after htresh change to normal) PASS: policy matches (exceptions and block policies after htresh change to normal) PASS: policies with repeated htresh change $ echo $? 0 This is because the $lret in check_xfrm() is not a local variable. Therefore when a test failed in check_exceptions(), the non-zero $lret will later get reset to 0 when the next test calls check_xfrm(). With this fix, the final return value will be 1. Make it easier for testers to spot this failure. Fixes: 39aa6928d462d0 ("xfrm: policy: fix netlink/pf_key policy lookups") Signed-off-by: Po-Hsu Lin Acked-by: Florian Westphal Signed-off-by: Steffen Klassert --- tools/testing/selftests/net/xfrm_policy.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 7a1bf94c5bd3..5922941e70c6 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -202,7 +202,7 @@ check_xfrm() { # 1: iptables -m policy rule count != 0 rval=$1 ip=$2 - lret=0 + local lret=0 ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null -- cgit v1.3.1 From da64ae2d35d3673233f0403b035d4c6acbf71965 Mon Sep 17 00:00:00 2001 From: Visa Hankala Date: Wed, 30 Dec 2020 16:15:53 +0000 Subject: xfrm: Fix wraparound in xfrm_policy_addr_delta() Use three-way comparison for address components to avoid integer wraparound in the result of xfrm_policy_addr_delta(). This ensures that the search trees are built and traversed correctly. Treat IPv4 and IPv6 similarly by returning 0 when prefixlen == 0. Prefix /0 has only one equivalence class. Fixes: 9cf545ebd591d ("xfrm: policy: store inexact policies in a tree ordered by destination address") Signed-off-by: Visa Hankala Acked-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 26 ++++++++++++------ tools/testing/selftests/net/xfrm_policy.sh | 43 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2f84136af48a..b74f28cabe24 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -793,15 +793,22 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a, const xfrm_address_t *b, u8 prefixlen, u16 family) { + u32 ma, mb, mask; unsigned int pdw, pbi; int delta = 0; switch (family) { case AF_INET: - if (sizeof(long) == 4 && prefixlen == 0) - return ntohl(a->a4) - ntohl(b->a4); - return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) - - (ntohl(b->a4) & ((~0UL << (32 - prefixlen)))); + if (prefixlen == 0) + return 0; + mask = ~0U << (32 - prefixlen); + ma = ntohl(a->a4) & mask; + mb = ntohl(b->a4) & mask; + if (ma < mb) + delta = -1; + else if (ma > mb) + delta = 1; + break; case AF_INET6: pdw = prefixlen >> 5; pbi = prefixlen & 0x1f; @@ -812,10 +819,13 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a, return delta; } if (pbi) { - u32 mask = ~0u << (32 - pbi); - - delta = (ntohl(a->a6[pdw]) & mask) - - (ntohl(b->a6[pdw]) & mask); + mask = ~0U << (32 - pbi); + ma = ntohl(a->a6[pdw]) & mask; + mb = ntohl(b->a6[pdw]) & mask; + if (ma < mb) + delta = -1; + else if (ma > mb) + delta = 1; } break; default: diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 5922941e70c6..bdf450eaf60c 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -287,6 +287,47 @@ check_hthresh_repeat() return 0 } +# insert non-overlapping policies in a random order and check that +# all of them can be fetched using the traffic selectors. +check_random_order() +{ + local ns=$1 + local log=$2 + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + if ! ip -net $ns xfrm policy get dst $j.0.0.0/24 dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + ip -net $ns xfrm policy add dst $addr dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + if ! ip -net $ns xfrm policy get dst $addr dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + ip -net $ns xfrm policy flush + + echo "PASS: $log" + return 0 +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -438,6 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal" check_hthresh_repeat "policies with repeated htresh change" +check_random_order ns3 "policies inserted in random order" + for i in 1 2 3 4;do ip netns del ns$i;done exit $ret -- cgit v1.3.1 From f981dc171c04c6cf5a35c712543b231ebf805832 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 20 Dec 2020 23:18:58 -0800 Subject: tools/power/x86/intel-speed-select: Set scaling_max_freq to base_frequency When BIOS disables turbo, The scaling_max_freq in cpufreq sysfs will be limited to config level 0 base frequency. But when user selects a higher config levels, this will result in higher base frequency. But since scaling_max_freq is still old base frequency, the performance will still be limited. So when the turbo is disabled and cpufreq base_frequency is higher than scaling_max_freq, update the scaling_max_freq to the base_frequency. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20201221071859.2783957-2-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/isst-config.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 5390158cdb40..9b6f0e6f150d 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1249,6 +1249,8 @@ static void dump_isst_config(int arg) isst_ctdp_display_information_end(outf); } +static void adjust_scaling_max_from_base_freq(int cpu); + static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, void *arg4) { @@ -1267,6 +1269,9 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, int pkg_id = get_physical_package_id(cpu); int die_id = get_physical_die_id(cpu); + /* Wait for updated base frequencies */ + usleep(2000); + fprintf(stderr, "Option is set to online/offline\n"); ctdp_level.core_cpumask_size = alloc_cpu_set(&ctdp_level.core_cpumask); @@ -1283,6 +1288,7 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) { fprintf(stderr, "online cpu %d\n", i); set_cpu_online_offline(i, 1); + adjust_scaling_max_from_base_freq(i); } else { fprintf(stderr, "offline cpu %d\n", i); set_cpu_online_offline(i, 0); @@ -1440,6 +1446,21 @@ static int set_cpufreq_scaling_min_max(int cpu, int max, int freq) return 0; } +static int no_turbo(void) +{ + return parse_int_file(0, "/sys/devices/system/cpu/intel_pstate/no_turbo"); +} + +static void adjust_scaling_max_from_base_freq(int cpu) +{ + int base_freq, scaling_max_freq; + + scaling_max_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_max_freq < base_freq || no_turbo()) + set_cpufreq_scaling_min_max(cpu, 1, base_freq); +} + static int set_clx_pbf_cpufreq_scaling_min_max(int cpu) { struct isst_pkg_ctdp_level_info *ctdp_level; -- cgit v1.3.1 From bbaa2e95e23e74791dd75b90d5ad9aad535acc6e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 20 Dec 2020 23:18:59 -0800 Subject: tools/power/x86/intel-speed-select: Set higher of cpuinfo_max_freq or base_frequency In some case when BIOS disabled turbo, cpufreq cpuinfo_max_freq can be lower than base_frequency at higher config level. So, in that case set scaling_min_freq to base_frequency. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20201221071859.2783957-3-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/isst-config.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 9b6f0e6f150d..09cb3a6672f3 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1461,6 +1461,16 @@ static void adjust_scaling_max_from_base_freq(int cpu) set_cpufreq_scaling_min_max(cpu, 1, base_freq); } +static void adjust_scaling_min_from_base_freq(int cpu) +{ + int base_freq, scaling_min_freq; + + scaling_min_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_min_freq < base_freq) + set_cpufreq_scaling_min_max(cpu, 0, base_freq); +} + static int set_clx_pbf_cpufreq_scaling_min_max(int cpu) { struct isst_pkg_ctdp_level_info *ctdp_level; @@ -1558,6 +1568,7 @@ static void set_scaling_min_to_cpuinfo_max(int cpu) continue; set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); + adjust_scaling_min_from_base_freq(i); } } -- cgit v1.3.1 From 65a4e5299739abe0888cda0938d21f8ea3b5c606 Mon Sep 17 00:00:00 2001 From: David Gow Date: Mon, 21 Dec 2020 23:39:00 -0800 Subject: kunit: tool: Force the use of the 'tty' console for UML kunit_tool relies on the UML console outputting printk() output to the tty in order to get results. Since the default console driver could change, pass 'console=tty' to the kernel. This is triggered by a change[1] to use ttynull as a fallback console driver which -- by chance or by design -- seems to have changed the default console output on UML, breaking kunit_tool. While this may be fixed, we should be less fragile to such changes in the default. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=757055ae8dedf5333af17b3b5b4b70ba9bc9da4e Signed-off-by: David Gow Fixes: 757055ae8ded ("init/console: Use ttynull as a fallback when there is no console") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 57c1724b7e5d..698358c9c0d6 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -198,7 +198,7 @@ class LinuxSourceTree(object): return self.validate_config(build_dir) def run_kernel(self, args=[], build_dir='', timeout=None): - args.extend(['mem=1G']) + args.extend(['mem=1G', 'console=tty']) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) -- cgit v1.3.1 From 3b4cf848dad5dad4bf239ba664c809c8cf29f1ed Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 17 Dec 2020 17:31:40 +0100 Subject: selftests/vDSO: add additional binaries to .gitignore Add the test binaries introduced by commit 693f5ca08ca0 ("kselftest: Extend vDSO selftest"), commit 03f55c7952c9 ("kselftest: Extend vDSO selftest to clock_getres") and commit c7e5789b24d3 ("kselftest: Move test_vdso to the vDSO test suite") to .gitignore. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/.gitignore | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 5eb64d41e541..a8dc51af5a9c 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only vdso_test +vdso_test_abi +vdso_test_clock_getres +vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu vdso_standalone_test_x86 -- cgit v1.3.1 From df00d02989024d193a6efd1a85513a5658c6a10f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 17 Dec 2020 17:32:35 +0100 Subject: selftests/vDSO: fix -Wformat warning in vdso_test_correctness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following -Wformat warnings in vdso_test_correctness.c: vdso_test_correctness.c: In function ‘test_one_clock_gettime64’: vdso_test_correctness.c:352:21: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 3 has type ‘long long int’ [-Wformat=] 352 | printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", | ~~~~^ | | | long int | %09lld 353 | (unsigned long long)start.tv_sec, start.tv_nsec, | ~~~~~~~~~~~~~ | | | long long int vdso_test_correctness.c:352:32: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 5 has type ‘long long int’ [-Wformat=] 352 | printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", | ~~~~^ | | | long int | %09lld 353 | (unsigned long long)start.tv_sec, start.tv_nsec, 354 | (unsigned long long)vdso.tv_sec, vdso.tv_nsec, | ~~~~~~~~~~~~ | | | long long int vdso_test_correctness.c:352:43: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 7 has type ‘long long int’ [-Wformat=] The tv_sec member of __kernel_timespec is long long, both in uapi/linux/time_types.h and locally in vdso_test_correctness.c. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_correctness.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 5029ef9b228c..c4aea794725a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -349,7 +349,7 @@ static void test_one_clock_gettime64(int clock, const char *name) return; } - printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + printf("\t%llu.%09lld %llu.%09lld %llu.%09lld\n", (unsigned long long)start.tv_sec, start.tv_nsec, (unsigned long long)vdso.tv_sec, vdso.tv_nsec, (unsigned long long)end.tv_sec, end.tv_nsec); -- cgit v1.3.1 From 7a6eb7c34a78498742b5f82543b7a68c1c443329 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 10 Dec 2020 18:52:33 +0000 Subject: selftests: Skip BPF seftests by default The BPF selftests have build time dependencies on cutting edge versions of tools in the BPF ecosystem including LLVM which are more involved to satisfy than more typical requirements like installing a package from your distribution. This causes issues for users looking at kselftest in as a whole who find that a default build of kselftest fails and that resolving this is time consuming and adds administrative overhead. The fast pace of BPF development and the need for a full BPF stack to do substantial development or validation work on the code mean that people working directly on it don't see a reasonable way to keep supporting older environments without causing problems with the usability of the BPF tests in BPF development so these requirements are unlikely to be relaxed in the immediate future. There is already support for skipping targets so in order to reduce the barrier to entry for people interested in kselftest as a whole let's use that to skip the BPF tests by default when people work with the top level kselftest build system. Users can still build the BPF selftests as part of the wider kselftest build by specifying SKIP_TARGETS, including setting an empty SKIP_TARGETS to build everything. They can also continue to build the BPF selftests individually in cases where they are specifically focused on BPF. This isn't ideal since it means people will need to take special steps to build the BPF tests but the dependencies mean that realistically this is already the case to some extent and it makes it easier for people to pick up and work with the other selftests which is hopefully a net win. Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index afbab4aeef3c..8a917cb4426a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -77,8 +77,10 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug -# User can optionally provide a TARGETS skiplist. -SKIP_TARGETS ?= +# User can optionally provide a TARGETS skiplist. By default we skip +# BPF since it has cutting edge build time dependencies which require +# more effort to install. +SKIP_TARGETS ?= bpf ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) -- cgit v1.3.1 From 2ff2c7e274392871bfdee00ff2adbb8ebae5d240 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 30 Dec 2020 13:42:51 +0200 Subject: selftests: mlxsw: Set headroom size of correct port The test was setting the headroom size of the wrong port. This was not visible because of a firmware bug that canceled this bug. Set the headroom size of the correct port, so that the test will pass with both old and new firmware versions. Fixes: bfa804784e32 ("selftests: mlxsw: Add a PFC test") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20201230114251.394009-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index 4d900bc1f76c..5c7700212f75 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -230,7 +230,7 @@ switch_create() __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which # is (-2*MTU) about 80K of delay provision. - __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null # bridges # ------- -- cgit v1.3.1 From 3503ee6c0bec5f173d606359e6384a5ef85492fb Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Tue, 5 Jan 2021 18:17:40 +0800 Subject: selftests: fix the return value for UDP GRO test The udpgro.sh will always return 0 (unless the bpf selftest was not build first) even if there are some failed sub test-cases. Therefore the kselftest framework will report this case is OK. Check and return the exit status of each test to make it easier to spot real failures. Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index ac2a30be9b32..f8a19f548ae9 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,14 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +# set global exit status, but never reset nonzero one. +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -44,7 +52,9 @@ run_one() { # Hack: let bg programs complete the startup sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_test() { @@ -87,8 +97,10 @@ run_one_nat() { sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? kill -INT $pid wait $(jobs -p) + return $ret } run_one_2sock() { @@ -110,7 +122,9 @@ run_one_2sock() { sleep 0.1 # first UDP GSO socket should be closed at this point ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_nat_test() { @@ -131,36 +145,54 @@ run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + ret=0 echo "ipv4" run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + check_err $? # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) # when GRO does not take place run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + check_err $? # the GSO packets are aggregated because: # * veth schedule napi after each xmit # * segmentation happens in BH context, veth napi poll is delayed after # the transmission of the last segment run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + check_err $? run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + check_err $? run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + check_err $? run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + check_err $? run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + check_err $? run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + check_err $? run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" + check_err $? + return $ret } if [ ! -f ../bpf/xdp_dummy.o ]; then @@ -180,3 +212,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then shift run_one_2sock $@ fi + +exit $? -- cgit v1.3.1 From 67208692802ce3cacfa00fe586dc0cb1bef0a51c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Jan 2021 00:42:19 +0100 Subject: tools/resolve_btfids: Warn when having multiple IDs for single type The kernel image can contain multiple types (structs/unions) with the same name. This causes distinct type hierarchies in BTF data and makes resolve_btfids fail with error like: BTFIDS vmlinux FAILED unresolved symbol udp6_sock as reported by Qais Yousef [1]. This change adds warning when multiple types of the same name are detected: BTFIDS vmlinux WARN: multiple IDs found for 'file': 526, 113351 - using 526 WARN: multiple IDs found for 'sk_buff': 2744, 113958 - using 2744 We keep the lower ID for the given type instance and let the build continue. Also changing the 'nr' variable name to 'nr_types' to avoid confusion. [1] https://lore.kernel.org/lkml/20201229151352.6hzmjvu3qh6p2qgg@e107158-lin/ Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210105234219.970039-1-jolsa@kernel.org --- tools/bpf/resolve_btfids/main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index e3ea569ee125..7409d7860aa6 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -139,6 +139,8 @@ int eprintf(int level, int var, const char *fmt, ...) #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) static bool is_btf_id(const char *name) { @@ -472,7 +474,7 @@ static int symbols_resolve(struct object *obj) int nr_funcs = obj->nr_funcs; int err, type_id; struct btf *btf; - __u32 nr; + __u32 nr_types; btf = btf__parse(obj->btf ?: obj->path, NULL); err = libbpf_get_error(btf); @@ -483,12 +485,12 @@ static int symbols_resolve(struct object *obj) } err = -1; - nr = btf__get_nr_types(btf); + nr_types = btf__get_nr_types(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id <= nr; type_id++) { + for (type_id = 1; type_id <= nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; @@ -526,8 +528,13 @@ static int symbols_resolve(struct object *obj) id = btf_id__find(root, str); if (id) { - id->id = type_id; - (*nr)--; + if (id->id) { + pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n", + str, id->id, type_id, id->id); + } else { + id->id = type_id; + (*nr)--; + } } } -- cgit v1.3.1 From 6f02b540d7597f357bc6ee711346761045d4e108 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 6 Jan 2021 15:59:06 +0000 Subject: bpftool: Fix compilation failure for net.o with older glibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For older glibc ~2.17, #include'ing both linux/if.h and net/if.h fails due to complaints about redefinition of interface flags: CC net.o In file included from net.c:13:0: /usr/include/linux/if.h:71:2: error: redeclaration of enumerator ‘IFF_UP’ IFF_UP = 1<<0, /* sysfs */ ^ /usr/include/net/if.h:44:5: note: previous definition of ‘IFF_UP’ was here IFF_UP = 0x1, /* Interface is up. */ The issue was fixed in kernel headers in [1], but since compilation of net.c picks up system headers the problem can recur. Dropping #include resolves the issue and it is not needed for compilation anyhow. [1] https://lore.kernel.org/netdev/1461512707-23058-1-git-send-email-mikko.rapeli__34748.27880641$1462831734$gmane$org@iki.fi/ Fixes: f6f3bac08ff9 ("tools/bpf: bpftool: add net support") Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/1609948746-15369-1-git-send-email-alan.maguire@oracle.com --- tools/bpf/bpftool/net.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 3fae61ef6339..ff3aa0cf3997 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include -- cgit v1.3.1 From 5316a7c0130acf09bfc8bb0092407006010fcccc Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 5 Jan 2021 16:22:26 -0800 Subject: tools: selftests: add test for changing routes with PTMU exceptions Adds new 2 new tests to the PTMU script: pmtu_ipv4/6_route_change. These tests explicitly test for a recently discovered problem in the IPv6 routing framework where PMTU exceptions were not properly released when replacing a route via "ip route change ...". After creating PMTU exceptions, the route from the device A to R1 will be replaced with a new route, then device A will be deleted. If the PMTU exceptions were properly cleaned up by the kernel, this device deletion will succeed. Otherwise, the unregistration of the device will stall, and messages such as the following will be logged in dmesg: unregister_netdevice: waiting for veth_A-R1 to become free. Usage count = 4 Signed-off-by: Sean Tranchetti Reviewed-by: David Ahern Link: https://lore.kernel.org/r/1609892546-11389-2-git-send-email-stranche@quicinc.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 71 +++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 464e31eabc73..64cd2e23c568 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -162,7 +162,15 @@ # - list_flush_ipv6_exception # Using the same topology as in pmtu_ipv6, create exceptions, and check # they are shown when listing exception caches, gone after flushing them - +# +# - pmtu_ipv4_route_change +# Use the same topology as in pmtu_ipv4, but issue a route replacement +# command and delete the corresponding device afterward. This tests for +# proper cleanup of the PMTU exceptions by the route replacement path. +# Device unregistration should complete successfully +# +# - pmtu_ipv6_route_change +# Same as above but with IPv6 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -224,7 +232,9 @@ tests=" cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 - list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" + list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 + pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" NS_A="ns-A" NS_B="ns-B" @@ -1782,6 +1792,63 @@ test_list_flush_ipv6_exception() { return ${fail} } +test_pmtu_ipvX_route_change() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + gw="${prefix4}.${a_r1}.2" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + gw="${prefix6}:${a_r1}::2" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Replace the route from A to R1 + run_cmd ${ns_a} ip route change default via ${gw} + + # Delete the device in A + run_cmd ${ns_a} ip link del "veth_A-R1" +} + +test_pmtu_ipv4_route_change() { + test_pmtu_ipvX_route_change 4 +} + +test_pmtu_ipv6_route_change() { + test_pmtu_ipvX_route_change 6 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." -- cgit v1.3.1 From 9e7a67dee27902fedab880b9af909bd4acd0fba9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jan 2021 00:15:21 +0100 Subject: selftests: netfilter: add selftest for ipip pmtu discovery with enabled connection tracking Convert Christians bug description into a reproducer. Cc: Shuah Khan Reported-by: Christian Perle Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- tools/testing/selftests/netfilter/Makefile | 3 +- .../selftests/netfilter/ipip-conntrack-mtu.sh | 206 +++++++++++++++++++++ 2 files changed, 208 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index a374e10ef506..3006a8e5b41a 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -4,7 +4,8 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh + nft_queue.sh nft_meta.sh \ + ipip-conntrack-mtu.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh new file mode 100755 index 000000000000..4a6f5c3b3215 --- /dev/null +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Conntrack needs to reassemble fragments in order to have complete +# packets for rule matching. Reassembly can lead to packet loss. + +# Consider the following setup: +# +--------+ +---------+ +--------+ +# |Router A|-------|Wanrouter|-------|Router B| +# | |.IPIP..| |..IPIP.| | +# +--------+ +---------+ +--------+ +# / mtu 1400 \ +# / \ +#+--------+ +--------+ +#|Client A| |Client B| +#| | | | +#+--------+ +--------+ + +# Router A and Router B use IPIP tunnel interfaces to tunnel traffic +# between Client A and Client B over WAN. Wanrouter has MTU 1400 set +# on its interfaces. + +rnd=$(mktemp -u XXXXXXXX) +rx=$(mktemp) + +r_a="ns-ra-$rnd" +r_b="ns-rb-$rnd" +r_w="ns-rw-$rnd" +c_a="ns-ca-$rnd" +c_b="ns-cb-$rnd" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "iptables --version" "run test without iptables" +checktool "ip -Version" "run test without ip tool" +checktool "which nc" "run test without nc (netcat)" +checktool "ip netns add ${r_a}" "create net namespace" + +for n in ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns add ${n} +done + +cleanup() { + for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns del ${n} + done + rm -f ${rx} +} + +trap cleanup EXIT + +test_path() { + msg="$1" + + ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & + + sleep 1 + for i in 1 2 3; do + head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 + done + + wait + + bytes=$(wc -c < ${rx}) + + if [ $bytes -eq 1400 ];then + echo "OK: PMTU $msg connection tracking" + else + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" + exit 1 + fi +} + +# Detailed setup for Router A +# --------------------------- +# Interfaces: +# eth0: 10.2.2.1/24 +# eth1: 192.168.10.1/24 +# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 +# Routes: +# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) +# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} +ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} + +l_addr="10.2.2.1" +r_addr="10.4.4.1" +ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_a} link set $dev up +done + +ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 +ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 + +ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 +ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 + +ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Detailed setup for Router B +# --------------------------- +# Interfaces: +# eth0: 10.4.4.1/24 +# eth1: 192.168.20.1/24 +# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 +# Routes: +# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) +# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} +ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} + +l_addr="10.4.4.1" +r_addr="10.2.2.1" + +ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_b} link set $dev up +done + +ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 +ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 + +ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 +ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Client A +ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 +ip -net ${c_a} link set dev lo up +ip -net ${c_a} link set dev veth0 up +ip -net ${c_a} route add default via 192.168.10.1 + +# Client A +ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 +ip -net ${c_b} link set dev veth0 up +ip -net ${c_b} link set dev lo up +ip -net ${c_b} route add default via 192.168.20.1 + +# Wan +ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 +ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 + +ip -net ${r_w} link set dev lo up +ip -net ${r_w} link set dev veth0 up mtu 1400 +ip -net ${r_w} link set dev veth1 up mtu 1400 + +ip -net ${r_a} link set dev veth0 mtu 1400 +ip -net ${r_b} link set dev veth0 mtu 1400 + +ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Path MTU discovery +# ------------------ +# Running tracepath from Client A to Client B shows PMTU discovery is working +# as expected: +# +# clienta:~# tracepath 192.168.20.2 +# 1?: [LOCALHOST] pmtu 1500 +# 1: 192.168.10.1 0.867ms +# 1: 192.168.10.1 0.302ms +# 2: 192.168.10.1 0.312ms pmtu 1480 +# 2: no reply +# 3: 192.168.10.1 0.510ms pmtu 1380 +# 3: 192.168.20.2 2.320ms reached +# Resume: pmtu 1380 hops 3 back 3 + +# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 + +# Router A has learned PMTU (1400) to Router B from Wanrouter. +# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B +# from Router A. + +#Send large UDP packet +#--------------------- +#Now we send a 1400 bytes UDP packet from Client A to Client B: + +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 +test_path "without" + +# The IPv4 stack on Client A already knows the PMTU to Client B, so the +# UDP packet is sent as two fragments (1380 + 20). Router A forwards the +# fragments between eth1 and ipip0. The fragments fit into the tunnel and +# reach their destination. + +#When sending the large UDP packet again, Router A now reassembles the +#fragments before routing the packet over ipip0. The resulting IPIP +#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is +#dropped on Router A before sending. + +ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW +test_path "with" -- cgit v1.3.1 From e42ac777d661e878c3b9bac56df11e226cab3010 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:32 +0100 Subject: KVM: selftests: Factor out guest mode code demand_paging_test, dirty_log_test, and dirty_log_perf_test have redundant guest mode code. Factor it out. Also, while adding a new include, remove the ones we don't need. Reviewed-by: Ben Gardon Reviewed-by: Peter Xu Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-2-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- tools/testing/selftests/kvm/demand_paging_test.c | 107 +++++------------- tools/testing/selftests/kvm/dirty_log_perf_test.c | 121 ++++++--------------- tools/testing/selftests/kvm/dirty_log_test.c | 125 +++++++--------------- tools/testing/selftests/kvm/include/guest_modes.h | 21 ++++ tools/testing/selftests/kvm/lib/guest_modes.c | 70 ++++++++++++ 6 files changed, 189 insertions(+), 257 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/guest_modes.h create mode 100644 tools/testing/selftests/kvm/lib/guest_modes.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c7ca4faba272..a7286a08c3ae 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 3d96a7bfaff3..946161a9ce2d 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,23 +7,20 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ +#define _GNU_SOURCE /* for program_invocation_name and pipe2 */ #include #include -#include -#include -#include #include #include #include -#include -#include #include +#include -#include "perf_test_util.h" -#include "processor.h" +#include "kvm_util.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" #ifdef __NR_userfaultfd @@ -248,9 +245,14 @@ static int setup_demand_paging(struct kvm_vm *vm, return 0; } -static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay) +struct test_params { + bool use_uffd; + useconds_t uffd_delay; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; @@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); - if (use_uffd) { + if (p->use_uffd) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); @@ -308,7 +310,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, r = setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], - uffd_delay, &uffd_args[vcpu_id], + p->uffd_delay, &uffd_args[vcpu_id], vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); @@ -339,7 +341,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("All vCPU threads joined\n"); - if (use_uffd) { + if (p->use_uffd) { char c; /* Tell the user fault fd handler threads to quit */ @@ -362,38 +364,19 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, free(guest_data_prototype); free(vcpu_threads); - if (use_uffd) { + if (p->use_uffd) { free(uffd_handler_threads); free(uffd_args); free(pipefds); } } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" " [-b memory] [-v vcpus]\n", name); - printf(" -m: specify the guest mode ID to test\n" - " (default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -u: use User Fault FD to handle vCPU page\n" " faults.\n"); printf(" -d: add a delay in usec to the User Fault\n" @@ -410,53 +393,22 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - bool mode_selected = false; - unsigned int mode; - int opt, i; - bool use_uffd = false; - useconds_t uffd_delay = 0; - -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + struct test_params p = {}; + int opt; + + guest_modes_append_default(); while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) { switch (opt) { case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'u': - use_uffd = true; + p.use_uffd = true; break; case 'd': - uffd_delay = strtoul(optarg, NULL, 0); - TEST_ASSERT(uffd_delay >= 0, - "A negative UFFD delay is not supported."); + p.uffd_delay = strtoul(optarg, NULL, 0); + TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); break; case 'b': guest_percpu_mem_size = parse_size(optarg); @@ -473,14 +425,7 @@ int main(int argc, char *argv[]) } } - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9c6a7be31e03..506741eb5d7f 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -12,16 +12,14 @@ #include #include -#include #include #include #include -#include #include "kvm_util.h" -#include "perf_test_util.h" -#include "processor.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -89,9 +87,15 @@ static void *vcpu_worker(void *data) return NULL; } -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - uint64_t phys_offset, int wr_fract) +struct test_params { + unsigned long iterations; + uint64_t phys_offset; + int wr_fract; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; struct kvm_vm *vm; unsigned long *bmap; @@ -108,7 +112,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); - perf_test_args.wr_fract = wr_fract; + perf_test_args.wr_fract = p->wr_fract; guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -156,7 +160,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); - while (iteration < iterations) { + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs * dirtying memory again. @@ -210,15 +214,15 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - avg = timespec_div(get_dirty_log_total, iterations); + avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, get_dirty_log_total.tv_sec, + p->iterations, get_dirty_log_total.tv_sec, get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); if (dirty_log_manual_caps) { - avg = timespec_div(clear_dirty_log_total, iterations); + avg = timespec_div(clear_dirty_log_total, p->iterations); pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, clear_dirty_log_total.tv_sec, + p->iterations, clear_dirty_log_total.tv_sec, clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); } @@ -228,20 +232,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-p offset] " "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); @@ -250,14 +242,7 @@ static void help(char *name) TEST_HOST_LOOP_N); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); @@ -272,74 +257,43 @@ static void help(char *name) int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i; - int wr_fract = 1; + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .wr_fract = 1, + }; + int opt; dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { switch (opt) { case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'f': - wr_fract = atoi(optarg); - TEST_ASSERT(wr_fract >= 1, + p.wr_fract = atoi(optarg); + TEST_ASSERT(p.wr_fract >= 1, "Write fraction cannot be less than one"); break; case 'v': nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(nr_vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -348,18 +302,11 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); - pr_info("Test iterations: %"PRIu64"\n", iterations); + pr_info("Test iterations: %"PRIu64"\n", p.iterations); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, iterations, phys_offset, wr_fract); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 471baecb7772..bb2752d78fe3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -9,8 +9,6 @@ #include #include -#include -#include #include #include #include @@ -20,8 +18,9 @@ #include #include -#include "test_util.h" #include "kvm_util.h" +#include "test_util.h" +#include "guest_modes.h" #include "processor.h" #define VCPU_ID 1 @@ -673,9 +672,15 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - unsigned long interval, uint64_t phys_offset) +struct test_params { + unsigned long iterations; + unsigned long interval; + uint64_t phys_offset; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; struct kvm_vm *vm; unsigned long *bmap; @@ -709,12 +714,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_page_size = getpagesize(); host_num_pages = vm_num_host_pages(mode, guest_num_pages); - if (!phys_offset) { + if (!p->phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { - guest_test_phys_mem = phys_offset; + guest_test_phys_mem = p->phys_offset; } #ifdef __s390x__ @@ -758,9 +763,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); - while (iteration < iterations) { + while (iteration < p->iterations) { /* Give the vcpu thread some time to dirty some pages */ - usleep(interval * 1000); + usleep(p->interval * 1000); log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); @@ -783,20 +788,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); @@ -813,51 +806,23 @@ static void help(char *name) printf(" -M: specify the host logging mode " "(default: run all log modes). Supported modes: \n\t"); log_modes_dump(); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); puts(""); exit(0); } int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - unsigned long interval = TEST_HOST_LOOP_INTERVAL; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i, j; + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .interval = TEST_HOST_LOOP_INTERVAL, + }; + int opt, i; sem_init(&dirty_ring_vcpu_stop, 0, 0); sem_init(&dirty_ring_vcpu_cont, 0, 0); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { switch (opt) { @@ -865,24 +830,16 @@ int main(int argc, char *argv[]) test_dirty_ring_count = strtol(optarg, NULL, 10); break; case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'I': - interval = strtol(optarg, NULL, 10); + p.interval = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'M': if (!strcmp(optarg, "all")) { @@ -911,32 +868,24 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); - TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); + p.iterations, p.interval); srandom(time(0)); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - if (host_log_mode_option == LOG_MODE_ALL) { - /* Run each log mode */ - for (j = 0; j < LOG_MODE_NUM; j++) { - pr_info("Testing Log Mode '%s'\n", - log_modes[j].name); - host_log_mode = j; - run_test(i, iterations, interval, phys_offset); - } - } else { - host_log_mode = host_log_mode_option; - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (i = 0; i < LOG_MODE_NUM; i++) { + pr_info("Testing Log Mode '%s'\n", log_modes[i].name); + host_log_mode = i; + for_each_guest_mode(run_test, &p); } + } else { + host_log_mode = host_log_mode_option; + for_each_guest_mode(run_test, &p); } return 0; diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h new file mode 100644 index 000000000000..b691df33e64e --- /dev/null +++ b/tools/testing/selftests/kvm/include/guest_modes.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "kvm_util.h" + +struct guest_mode { + bool supported; + bool enabled; +}; + +extern struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_append(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +void guest_modes_append_default(void); +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg); +void guest_modes_help(void); +void guest_modes_cmdline(const char *arg); diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c new file mode 100644 index 000000000000..25bff307c71f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "guest_modes.h" + +struct guest_mode guest_modes[NUM_VM_MODES]; + +void guest_modes_append_default(void) +{ + guest_mode_append(VM_MODE_DEFAULT, true, true); + +#ifdef __aarch64__ + guest_mode_append(VM_MODE_P40V48_64K, true, true); + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + if (limit >= 52) + guest_mode_append(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_64K, true, true); + } + } +#endif +} + +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) +{ + int i; + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + func(i, arg); + } +} + +void guest_modes_help(void) +{ + int i; + + printf(" -m: specify the guest mode ID to test\n" + " (default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } +} + +void guest_modes_cmdline(const char *arg) +{ + static bool mode_selected; + unsigned int mode; + int i; + + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; +} -- cgit v1.3.1 From 1133e17ea7c9929ff7b90e81d8926f9e870748e9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:33 +0100 Subject: KVM: selftests: Use vm_create_with_vcpus in create_vm Reviewed-by: Ben Gardon Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 2 +- tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 - tools/testing/selftests/kvm/include/kvm_util.h | 8 ++++ .../testing/selftests/kvm/include/perf_test_util.h | 47 +++++----------------- tools/testing/selftests/kvm/lib/kvm_util.c | 9 +---- 5 files changed, 21 insertions(+), 47 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 946161a9ce2d..b0c41de32e9b 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,7 +7,7 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name and pipe2 */ +#define _GNU_SOURCE /* for pipe2 */ #include #include diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 506741eb5d7f..36bea75a8d6f 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -8,8 +8,6 @@ * Copyright (C) 2020, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include #include #include diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index dfa9d369e8fc..149766ecd68b 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -70,6 +70,14 @@ enum vm_guest_mode { #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 239421e4f6b8..cd4c258f458d 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -13,9 +13,6 @@ #define MAX_VCPUS 512 -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - #define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ @@ -94,41 +91,26 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes) { struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; uint64_t guest_num_pages; - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - - perf_test_args.vm = vm; - perf_test_args.guest_page_size = vm_get_page_size(vm); perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, "Guest memory size is not guest page size aligned."); - guest_num_pages = (vcpus * vcpu_memory_bytes) / - perf_test_args.guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; /* * If there should be more memory in the guest test region than there @@ -140,18 +122,13 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * perf_test_args.guest_page_size; guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); - #ifdef __s390x__ /* Align to 1M (segment size) */ guest_test_phys_mem &= ~((1 << 20) - 1); #endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); /* Add an extra memory slot for testing */ @@ -177,8 +154,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - vm_vcpu_add_default(vm, vcpu_id, guest_code); - vcpu_args->vcpu_id = vcpu_id; vcpu_args->gva = guest_test_virt_mem + (vcpu_id * vcpu_memory_bytes); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 88ef7067f1e6..fa5a90e6c6f0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -153,14 +153,7 @@ const char * const vm_guest_mode_string[] = { _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; - -static const struct vm_guest_mode_params vm_guest_mode_params[] = { +const struct vm_guest_mode_params vm_guest_mode_params[] = { { 52, 48, 0x1000, 12 }, { 52, 48, 0x10000, 16 }, { 48, 48, 0x1000, 12 }, -- cgit v1.3.1 From b268b6f0bd36322358accb15c45683a9e1220231 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:34 +0100 Subject: KVM: selftests: Implement perf_test_util more conventionally It's not conventional C to put non-inline functions in header files. Create a source file for the functions instead. Also reduce the amount of globals and rename the functions to something less generic. Reviewed-by: Ben Gardon Reviewed-by: Peter Xu Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-4-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- tools/testing/selftests/kvm/demand_paging_test.c | 11 +- tools/testing/selftests/kvm/dirty_log_perf_test.c | 22 ++-- tools/testing/selftests/kvm/include/kvm_util.h | 1 + .../testing/selftests/kvm/include/perf_test_util.h | 142 ++------------------- tools/testing/selftests/kvm/lib/perf_test_util.c | 134 +++++++++++++++++++ 6 files changed, 166 insertions(+), 146 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/perf_test_util.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a7286a08c3ae..fe41c6a0fa67 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index b0c41de32e9b..cdad1eca72f7 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -36,12 +36,14 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static char *guest_data_prototype; static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -263,7 +265,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) int vcpu_id; int r; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); if (p->use_uffd) { uffd_handler_threads = @@ -359,8 +361,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); free(guest_data_prototype); free(vcpu_threads); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 36bea75a8d6f..2283a0ec74a9 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -22,11 +22,14 @@ /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + /* Host variables */ static u64 dirty_log_manual_caps; static bool host_quit; static uint64_t iteration; -static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; +static uint64_t vcpu_last_completed_iteration[KVM_MAX_VCPUS]; static void *vcpu_worker(void *data) { @@ -38,7 +41,7 @@ static void *vcpu_worker(void *data) struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); @@ -108,7 +111,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = p->wr_fract; @@ -126,7 +129,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); sync_global_to_guest(vm, perf_test_args); @@ -152,7 +155,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Enable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, KVM_MEM_LOG_DIRTY_PAGES); ts_diff = timespec_diff_now(start); pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", @@ -179,7 +182,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) iteration, ts_diff.tv_sec, ts_diff.tv_nsec); clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap); ts_diff = timespec_diff_now(start); get_dirty_log_total = timespec_add(get_dirty_log_total, @@ -189,7 +192,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (dirty_log_manual_caps) { clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0, host_num_pages); ts_diff = timespec_diff_now(start); @@ -207,7 +210,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Disable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0); ts_diff = timespec_diff_now(start); pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); @@ -226,8 +229,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(bmap); free(vcpu_threads); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 149766ecd68b..5cbb861525ed 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,6 +16,7 @@ #include "sparsebit.h" +#define KVM_MAX_VCPUS 512 /* * Callers of kvm_util only have an incomplete/opaque description of the diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index cd4c258f458d..b1188823c31b 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -9,35 +9,15 @@ #define SELFTEST_KVM_PERF_TEST_UTIL_H #include "kvm_util.h" -#include "processor.h" - -#define MAX_VCPUS 512 - -#define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 #define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; - -/* Number of VCPUs for the test */ -static int nr_vcpus = 1; +#define PERF_TEST_MEM_SLOT_INDEX 1 -struct vcpu_args { +struct perf_test_vcpu_args { uint64_t gva; uint64_t pages; @@ -51,119 +31,21 @@ struct perf_test_args { uint64_t guest_page_size; int wr_fract; - struct vcpu_args vcpu_args[MAX_VCPUS]; + struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; -static struct perf_test_args perf_test_args; +extern struct perf_test_args perf_test_args; /* - * Continuously write to the first 8 bytes of each page in the - * specified region. + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. */ -static void guest_code(uint32_t vcpu_id) -{ - struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); - - gva = vcpu_args->gva; - pages = vcpu_args->pages; - - while (true) { - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); - - if (i % perf_test_args.wr_fract == 0) - *(uint64_t *)addr = 0x0123456789ABCDEF; - else - READ_ONCE(*(uint64_t *)addr); - } - - GUEST_SYNC(1); - } -} - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t guest_num_pages; - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - perf_test_args.host_page_size = getpagesize(); - perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; - - guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); - - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - vm = vm_create_with_vcpus(mode, vcpus, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, - 0, guest_code, NULL); - - perf_test_args.vm = vm; - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - perf_test_args.guest_page_size; - guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - return vm; -} - -static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) -{ - vm_paddr_t vcpu_gpa; - struct vcpu_args *vcpu_args; - int vcpu_id; - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - - vcpu_args->vcpu_id = vcpu_id; - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - perf_test_args.guest_page_size; +extern uint64_t guest_test_phys_mem; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); - } -} +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes); +void perf_test_destroy_vm(struct kvm_vm *vm); +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c new file mode 100644 index 000000000000..9be1944c2d1c --- /dev/null +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + */ + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" + +struct perf_test_args perf_test_args; + +uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); + } +} + +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t guest_num_pages; + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + PERF_TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +void perf_test_destroy_vm(struct kvm_vm *vm) +{ + ucall_uninit(vm); + kvm_vm_free(vm); +} + +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct perf_test_vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} -- cgit v1.3.1 From a5c9ca76a1c61fb5e4c35de8eb25aa925b03c9e4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 7 Jan 2021 16:48:24 +0200 Subject: selftests: fib_nexthops: Fix wrong mausezahn invocation For IPv6 traffic, mausezahn needs to be invoked with '-6'. Otherwise an error is returned: # ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" Failed to set source IPv4 address. Please check if source is set to a valid IPv4 address. Invalid command line parameters! Fixes: 7c741868ceab ("selftests: Add torture tests to nexthop tests") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_nexthops.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index eb693a3b7b4a..4c7d33618437 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -869,7 +869,7 @@ ipv6_torture() pid3=$! ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 -- cgit v1.3.1 From 3502bd9b5762154ff11665f3f18f6d7dcc6f781c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sat, 9 Jan 2021 00:37:45 +0300 Subject: selftests/tls: fix selftests after adding ChaCha20-Poly1305 TLS selftests where broken because of wrong variable types used. Fix it by changing u16 -> uint16_t Fixes: 4f336e88a870 ("selftests/tls: add CHACHA20-POLY1305 to tls selftests") Reported-by: kernel test robot Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1610141865-7142-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index cb0d1890a860..e0088c2d38a5 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -103,8 +103,8 @@ FIXTURE(tls) FIXTURE_VARIANT(tls) { - u16 tls_version; - u16 cipher_type; + uint16_t tls_version; + uint16_t cipher_type; }; FIXTURE_VARIANT_ADD(tls, 12_gcm) -- cgit v1.3.1 From fab336b42441e0b2eb1d81becedb45fbdf99606e Mon Sep 17 00:00:00 2001 From: Chen Yi Date: Tue, 5 Jan 2021 23:31:20 +0800 Subject: selftests: netfilter: Pass family parameter "-f" to conntrack tool Fix nft_conntrack_helper.sh false fail report: 1) Conntrack tool need "-f ipv6" parameter to show out ipv6 traffic items. 2) Sleep 1 second after background nc send packet, to make sure check is after this statement executed. False report: FAIL: ns1-lkjUemYw did not show attached helper ip set via ruleset PASS: ns1-lkjUemYw connection on port 2121 has ftp helper attached ... After fix: PASS: ns1-2hUniwU2 connection on port 2121 has ftp helper attached PASS: ns2-2hUniwU2 connection on port 2121 has ftp helper attached ... Fixes: 619ae8e0697a6 ("selftests: netfilter: add test case for conntrack helper assignment") Signed-off-by: Chen Yi Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_conntrack_helper.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh index edf0a48da6bf..bf6b9626c7dd 100755 --- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh +++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh @@ -94,7 +94,13 @@ check_for_helper() local message=$2 local port=$3 - ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' + if echo $message |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' if [ $? -ne 0 ] ; then echo "FAIL: ${netns} did not show attached helper $message" 1>&2 ret=1 @@ -111,8 +117,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ip $msg" $port check_for_helper "$ns2" "ip $msg" $port @@ -128,8 +134,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ipv6 $msg" $port check_for_helper "$ns2" "ipv6 $msg" $port -- cgit v1.3.1 From 2f94ac19184665263b7a285ae88abe19dedf9c1b Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 12 Jan 2021 07:55:23 +0000 Subject: bpf: Update local storage test to check handling of null ptrs It was found in [1] that bpf_inode_storage_get helper did not check the nullness of the passed owner ptr which caused an oops when dereferenced. This change incorporates the example suggested in [1] into the local storage selftest. The test is updated to create a temporary directory instead of just using a tempfile. In order to replicate the issue this copied rm binary is renamed tiggering the inode_rename with a null pointer for the new_inode. The logic to verify the setting and deletion of the inode local storage of the old inode is also moved to this LSM hook. The change also removes the copy_rm function and simply shells out to copy files and recursively delete directories and consolidates the logic of setting the initial inode storage to the bprm_committed_creds hook and removes the file_open hook. [1]: https://lore.kernel.org/bpf/CANaYP3HWkH91SN=wTNO9FL_2ztHfqcXKX38SSE-JJ2voh+vssw@mail.gmail.com Suggested-by: Gilad Reti Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075525.256820-2-kpsingh@kernel.org --- .../selftests/bpf/prog_tests/test_local_storage.c | 96 ++++++---------------- tools/testing/selftests/bpf/progs/local_storage.c | 62 ++++++++------ 2 files changed, 61 insertions(+), 97 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index c0fe73a17ed1..3bfcf00c0a67 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -34,61 +34,6 @@ struct storage { struct bpf_spin_lock lock; }; -/* Copies an rm binary to a temp file. dest is a mkstemp template */ -static int copy_rm(char *dest) -{ - int fd_in, fd_out = -1, ret = 0; - struct stat stat; - char *buf = NULL; - - fd_in = open("/bin/rm", O_RDONLY); - if (fd_in < 0) - return -errno; - - fd_out = mkstemp(dest); - if (fd_out < 0) { - ret = -errno; - goto out; - } - - ret = fstat(fd_in, &stat); - if (ret == -1) { - ret = -errno; - goto out; - } - - buf = malloc(stat.st_blksize); - if (!buf) { - ret = -errno; - goto out; - } - - while (ret = read(fd_in, buf, stat.st_blksize), ret > 0) { - ret = write(fd_out, buf, ret); - if (ret < 0) { - ret = -errno; - goto out; - - } - } - if (ret < 0) { - ret = -errno; - goto out; - - } - - /* Set executable permission on the copied file */ - ret = chmod(dest, 0100); - if (ret == -1) - ret = -errno; - -out: - free(buf); - close(fd_in); - close(fd_out); - return ret; -} - /* Fork and exec the provided rm binary and return the exit code of the * forked process and its pid. */ @@ -168,9 +113,11 @@ static bool check_syscall_operations(int map_fd, int obj_fd) void test_test_local_storage(void) { - char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX"; + char tmp_dir_path[64] = "/tmp/local_storageXXXXXX"; int err, serv_sk = -1, task_fd = -1, rm_fd = -1; struct local_storage *skel = NULL; + char tmp_exec_path[64]; + char cmd[256]; skel = local_storage__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -189,18 +136,24 @@ void test_test_local_storage(void) task_fd)) goto close_prog; - err = copy_rm(tmp_exec_path); - if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno)) + if (CHECK(!mkdtemp(tmp_dir_path), "mkdtemp", + "unable to create tmpdir: %d\n", errno)) goto close_prog; + snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm", + tmp_dir_path); + snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path); + if (CHECK_FAIL(system(cmd))) + goto close_prog_rmdir; + rm_fd = open(tmp_exec_path, O_RDONLY); if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d", tmp_exec_path, rm_fd, errno)) - goto close_prog; + goto close_prog_rmdir; if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map), rm_fd)) - goto close_prog; + goto close_prog_rmdir; /* Sets skel->bss->monitored_pid to the pid of the forked child * forks a child process that executes tmp_exec_path and tries to @@ -209,33 +162,36 @@ void test_test_local_storage(void) */ err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err)) - goto close_prog_unlink; + goto close_prog_rmdir; /* Set the process being monitored to be the current process */ skel->bss->monitored_pid = getpid(); - /* Remove the temporary created executable */ - err = unlink(tmp_exec_path); - if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path, - errno)) - goto close_prog_unlink; + /* Move copy_of_rm to a new location so that it triggers the + * inode_rename LSM hook with a new_dentry that has a NULL inode ptr. + */ + snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr", + tmp_dir_path, tmp_dir_path); + if (CHECK_FAIL(system(cmd))) + goto close_prog_rmdir; CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", "inode_local_storage not set\n"); serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) - goto close_prog; + goto close_prog_rmdir; CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", "sk_local_storage not set\n"); if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map), serv_sk)) - goto close_prog; + goto close_prog_rmdir; -close_prog_unlink: - unlink(tmp_exec_path); +close_prog_rmdir: + snprintf(cmd, sizeof(cmd), "rm -rf %s", tmp_dir_path); + system(cmd); close_prog: close(serv_sk); close(rm_fd); diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 3e3de130f28f..95868bc7ada9 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -50,7 +50,6 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; bool is_self_unlink; - int err; if (pid != monitored_pid) return 0; @@ -66,8 +65,27 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) return -EPERM; } - storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} + +SEC("lsm/inode_rename") +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct local_storage *storage; + int err; + + /* new_dentry->d_inode can be NULL when the inode is renamed to a file + * that did not exist before. The helper should be able to handle this + * NULL pointer. + */ + bpf_inode_storage_get(&inode_storage_map, new_dentry->d_inode, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + + storage = bpf_inode_storage_get(&inode_storage_map, old_dentry->d_inode, + 0, 0); if (!storage) return 0; @@ -76,7 +94,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) inode_storage_result = -1; bpf_spin_unlock(&storage->lock); - err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode); if (!err) inode_storage_result = err; @@ -133,37 +151,18 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, return 0; } -SEC("lsm/file_open") -int BPF_PROG(file_open, struct file *file) -{ - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct local_storage *storage; - - if (pid != monitored_pid) - return 0; - - if (!file->f_inode) - return 0; - - storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (!storage) - return 0; - - bpf_spin_lock(&storage->lock); - storage->value = DUMMY_STORAGE_VALUE; - bpf_spin_unlock(&storage->lock); - return 0; -} - /* This uses the local storage to remember the inode of the binary that a * process was originally executing. */ SEC("lsm/bprm_committed_creds") void BPF_PROG(exec, struct linux_binprm *bprm) { + __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; + if (pid != monitored_pid) + return; + storage = bpf_task_storage_get(&task_storage_map, bpf_get_current_task_btf(), 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -172,4 +171,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm) storage->exec_inode = bprm->file->f_inode; bpf_spin_unlock(&storage->lock); } + + storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode, + 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return; + + bpf_spin_lock(&storage->lock); + storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); } -- cgit v1.3.1 From b8d52264df85ec12f370c0a8b28d0ac59a05877a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 9 Jan 2021 23:03:41 -0800 Subject: libbpf: Allow loading empty BTFs Empty BTFs do come up (e.g., simple kernel modules with no new types and strings, compared to the vmlinux BTF) and there is nothing technically wrong with them. So remove unnecessary check preventing loading empty BTFs. Fixes: d8123624506c ("libbpf: Fix BTF data layout checks and allow empty BTF") Reported-by: Christopher William Snowhill Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210110070341.1380086-2-andrii@kernel.org --- tools/lib/bpf/btf.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3c3f2bc6c652..9970a288dda5 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -240,11 +240,6 @@ static int btf_parse_hdr(struct btf *btf) } meta_left = btf->raw_size - sizeof(*hdr); - if (!meta_left) { - pr_debug("BTF has no data\n"); - return -EINVAL; - } - if (meta_left < hdr->str_off + hdr->str_len) { pr_debug("Invalid BTF total size:%u\n", btf->raw_size); return -EINVAL; -- cgit v1.3.1 From a2e38dffcd93541914aba52b30c6a52acca35201 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 5 Jan 2021 18:04:20 -0600 Subject: objtool: Don't add empty symbols to the rbtree Building with the Clang assembler shows the following warning: arch/x86/kernel/ftrace_64.o: warning: objtool: missing symbol for insn at offset 0x16 The Clang assembler strips section symbols. That ends up giving objtool's find_func_containing() much more test coverage than normal. Turns out, find_func_containing() doesn't work so well for overlapping symbols: 2: 000000000000000e 0 NOTYPE LOCAL DEFAULT 2 fgraph_trace 3: 000000000000000f 0 NOTYPE LOCAL DEFAULT 2 trace 4: 0000000000000000 165 FUNC GLOBAL DEFAULT 2 __fentry__ 5: 000000000000000e 0 NOTYPE GLOBAL DEFAULT 2 ftrace_stub The zero-length NOTYPE symbols are inside __fentry__(), confusing the rbtree search for any __fentry__() offset coming after a NOTYPE. Try to avoid this problem by not adding zero-length symbols to the rbtree. They're rare and aren't needed in the rbtree anyway. One caveat, this actually might not end up being the right fix. Non-empty overlapping symbols, if they exist, could have the same problem. But that would need bigger changes, let's see if we can get away with the easy fix for now. Reported-by: Arnd Bergmann Acked-by: Peter Zijlstra (Intel) Signed-off-by: Josh Poimboeuf --- tools/objtool/elf.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index be89c741ba9a..f9682db33cca 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -448,6 +448,13 @@ static int read_symbols(struct elf *elf) list_add(&sym->list, entry); elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + + /* + * Don't store empty STT_NOTYPE symbols in the rbtree. They + * can exist within a function, confusing the sorting. + */ + if (!sym->len) + rb_erase(&sym->node, &sym->sec->symbol_tree); } if (stats) -- cgit v1.3.1 From 4237e9f4a96228ccc8a7abe5e4b30834323cd353 Mon Sep 17 00:00:00 2001 From: Gilad Reti Date: Wed, 13 Jan 2021 07:38:08 +0200 Subject: selftests/bpf: Add verifier test for PTR_TO_MEM spill Add a test to check that the verifier is able to recognize spilling of PTR_TO_MEM registers, by reserving a ringbuf buffer, forcing the spill of a pointer holding the buffer address to the stack, filling it back in from the stack and writing to the memory area pointed by it. The patch was partially contributed by CyberArk Software, Inc. Signed-off-by: Gilad Reti Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210113053810.13518-2-gilad.reti@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 12 ++++++++- tools/testing/selftests/bpf/verifier/spill_fill.c | 30 +++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 777a81404fdb..f8569f04064b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -50,7 +50,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 20 +#define MAX_NR_MAPS 21 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -87,6 +87,7 @@ struct bpf_test { int fixup_sk_storage_map[MAX_FIXUPS]; int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; + int fixup_map_ringbuf[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t insn_processed; @@ -640,6 +641,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_sk_storage_map = test->fixup_sk_storage_map; int *fixup_map_event_output = test->fixup_map_event_output; int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; + int *fixup_map_ringbuf = test->fixup_map_ringbuf; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -817,6 +819,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_reuseport_array++; } while (*fixup_map_reuseport_array); } + if (*fixup_map_ringbuf) { + map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0, + 0, 4096); + do { + prog[*fixup_map_ringbuf].imm = map_fds[20]; + fixup_map_ringbuf++; + } while (*fixup_map_ringbuf); + } } struct libcap { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 45d43bf82f26..0b943897aaf6 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -28,6 +28,36 @@ .result = ACCEPT, .result_unpriv = ACCEPT, }, +{ + "check valid spill/fill, ptr to mem", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = ACCEPT, + .result_unpriv = ACCEPT, +}, { "check corrupted spill/fill", .insns = { -- cgit v1.3.1 From 55ed4560774d81d7343223b8fd2784c530a9c6c1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 9 Dec 2020 14:27:44 +0900 Subject: tools/bootconfig: Add tracing_on support to helper scripts Add ftrace.instance.INSTANCE.tracing_on support to ftrace2bconf.sh and bconf2ftrace.sh. commit 8490db06f914 ("tracing/boot: Add per-instance tracing_on option support") added the per-instance tracing_on option, but forgot to update the helper scripts. Link: https://lkml.kernel.org/r/160749166410.3497930.14204335886811029800.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 8490db06f914 ("tracing/boot: Add per-instance tracing_on option support") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/scripts/bconf2ftrace.sh | 1 + tools/bootconfig/scripts/ftrace2bconf.sh | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh index 595e164dc352..feb30c2c7881 100755 --- a/tools/bootconfig/scripts/bconf2ftrace.sh +++ b/tools/bootconfig/scripts/bconf2ftrace.sh @@ -152,6 +152,7 @@ setup_instance() { # [instance] set_array_of ${instance}.options ${instancedir}/trace_options set_value_of ${instance}.trace_clock ${instancedir}/trace_clock set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask + set_value_of ${instance}.tracing_on ${instancedir}/tracing_on set_value_of ${instance}.tracer ${instancedir}/current_tracer set_array_of ${instance}.ftrace.filters \ ${instancedir}/set_ftrace_filter diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh index 6c0d4b61e0c2..a0c3bcc6da4f 100755 --- a/tools/bootconfig/scripts/ftrace2bconf.sh +++ b/tools/bootconfig/scripts/ftrace2bconf.sh @@ -221,6 +221,10 @@ instance_options() { # [instance-name] if [ `echo $val | sed -e s/f//g`x != x ]; then emit_kv $PREFIX.cpumask = $val fi + val=`cat $INSTANCE/tracing_on` + if [ `echo $val | sed -e s/f//g`x != x ]; then + emit_kv $PREFIX.tracing_on = $val + fi val= for i in `cat $INSTANCE/set_event`; do -- cgit v1.3.1 From 3a57a643a851dbb1c4a1819394ca009e3bfa4813 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Jan 2021 18:31:44 +0000 Subject: arm64: selftests: Fix spelling of 'Mismatch' The SVE and FPSIMD stress tests have a spelling mistake in the output, fix it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210108183144.673-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fpsimd-test.S | 2 +- tools/testing/selftests/arm64/fp/sve-test.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 1c5556bdd11d..0dbd594c2747 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -457,7 +457,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index f95074c9b48b..9210691aa998 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -625,7 +625,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" -- cgit v1.3.1 From 301f0203e04293c13372c032198665bd75adf81b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Dec 2020 15:41:19 -0300 Subject: perf bpf examples: Fix bpf.h header include directive in 5sec.c example It was looking at bpf/bpf.h, which caused this problem: # perf trace -e tools/perf/examples/bpf/5sec.c /home/acme/git/perf/tools/perf/examples/bpf/5sec.c:42:10: fatal error: 'bpf/bpf.h' file not found #include ^~~~~~~~~~~ 1 error generated. ERROR: unable to compile tools/perf/examples/bpf/5sec.c Hint: Check error message shown above. Hint: You can also pre-compile it into .o using: clang -target bpf -O2 -c tools/perf/examples/bpf/5sec.c with proper -I and -D options. event syntax error: 'tools/perf/examples/bpf/5sec.c' \___ Failed to load tools/perf/examples/bpf/5sec.c from source: Error when compiling BPF scriptlet # Change that to plain bpf.h, to make it work again: # perf trace -e tools/perf/examples/bpf/5sec.c sleep 5s 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1776891872, rqtp: 5000000000) # perf trace -e tools/perf/examples/bpf/5sec.c/max-stack=16/ sleep 5s 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1776891872, rqtp: 5000000000) hrtimer_nanosleep ([kernel.kallsyms]) common_nsleep ([kernel.kallsyms]) __x64_sys_clock_nanosleep ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __clock_nanosleep_2 (/usr/lib64/libc-2.32.so) # perf trace -e tools/perf/examples/bpf/5sec.c sleep 4s # Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/5sec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index 65c4ff6892d9..e6b6181c6dc6 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,7 +39,7 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo */ -#include +#include #define NSEC_PER_SEC 1000000000L -- cgit v1.3.1 From 38c53947a7dcb6d295769830c9085b0409921ec9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2021 12:26:08 -0300 Subject: tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: 647daca25d24fb6e ("KVM: SVM: Add support for booting APs in an SEV-ES guest") That don't cause any tooling change, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 886802b8ffba..374c67875cdb 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -573,6 +574,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; -- cgit v1.3.1 From addbdff24293ef772a1b8e5d127b570e70f08cdc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2021 12:35:27 -0300 Subject: tools headers: Syncronize linux/build_bug.h with the kernel sources To pick up the changes in: 3a176b94609a18f5 ("Revert "kbuild: avoid static_assert for genksyms"") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/build_bug.h' differs from latest version at 'include/linux/build_bug.h' diff -u tools/include/linux/build_bug.h include/linux/build_bug.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/build_bug.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index ce365d212768..cc7070c7439b 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,9 +79,4 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert -#ifdef __GENKSYMS__ -/* genksyms gets confused by _Static_assert */ -#define _Static_assert(expr, ...) -#endif - #endif /* _LINUX_BUILD_BUG_H */ -- cgit v1.3.1 From a042a82ddbb3434f523c0671f5301d1fe796b4eb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 14 Jan 2021 14:06:09 +0900 Subject: perf test: Fix shadow stat test for non-bash shells It was using some bash-specific features and failed to parse when running with a different shell like below: root@kbl-ppc:~/kbl-ws/perf-dev/lck-9077/acme.tmp/tools/perf# ./perf test 83 -vv 83: perf stat metrics (shadow stat) test : --- start --- test child forked, pid 3922 ./tests/shell/stat+shadow_stat.sh: 19: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 24: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 30: ./tests/shell/stat+shadow_stat.sh: [[: not found (standard_in) 2: syntax error ./tests/shell/stat+shadow_stat.sh: 36: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 19: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 24: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 30: ./tests/shell/stat+shadow_stat.sh: [[: not found (standard_in) 2: syntax error ./tests/shell/stat+shadow_stat.sh: 36: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 45: ./tests/shell/stat+shadow_stat.sh: declare: not found test child finished with -1 ---- end ---- perf stat metrics (shadow stat) test: FAILED! Reported-by: Jin Yao Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Laight Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114050609.1258820-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+shadow_stat.sh | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index 249dfe48cf6a..ebebd3596cf9 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -9,31 +9,29 @@ perf stat -a true > /dev/null 2>&1 || exit 2 test_global_aggr() { - local cyc - perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ grep -e cycles -e instructions | \ while read num evt hash ipc rest do # skip not counted events - if [[ $num == "&1 | \ grep ^CPU | \ while read cpu num evt hash ipc rest do # skip not counted events - if [[ $num == " Date: Thu, 14 Jan 2021 13:23:04 -0800 Subject: libperf tests: Avoid uninitialized variable warning The variable 'bf' is read (for a write call) without being initialized triggering a memory sanitizer warning. Use 'bf' in the read and switch the write to reading from a string. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114212304.4018119-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 6d8ebe0c2504..1b225fe34a72 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -208,7 +208,6 @@ static int test_mmap_thread(void) char path[PATH_MAX]; int id, err, pid, go_pipe[2]; union perf_event *event; - char bf; int count = 0; snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", @@ -229,6 +228,7 @@ static int test_mmap_thread(void) pid = fork(); if (!pid) { int i; + char bf; read(go_pipe[0], &bf, 1); @@ -266,7 +266,7 @@ static int test_mmap_thread(void) perf_evlist__enable(evlist); /* kick the child and wait for it to finish */ - write(go_pipe[1], &bf, 1); + write(go_pipe[1], "A", 1); waitpid(pid, NULL, 0); /* -- cgit v1.3.1 From bba2ea17ef553aea0df80cb64399fe2f70f225dd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 10:02:49 -0800 Subject: libperf tests: If a test fails return non-zero If a test fails return -1 rather than 0. This is consistent with the return value in test-cpumap.c Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114180250.3853825-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-cpumap.c | 2 +- tools/lib/perf/tests/test-evlist.c | 2 +- tools/lib/perf/tests/test-evsel.c | 2 +- tools/lib/perf/tests/test-threadmap.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index c8d45091e7c2..c70e9e03af3e 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_cpu_map__put(cpus); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 1b225fe34a72..220a95be47c3 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -409,5 +409,5 @@ int main(int argc, char **argv) test_mmap_cpus(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 135722ac965b..0ad82d7a2a51 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -131,5 +131,5 @@ int main(int argc, char **argv) test_stat_thread_enable(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 7dc4d6fbedde..384471441b48 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_thread_map__put(threads); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } -- cgit v1.3.1 From 66dd86b2a2bee129c70f7ff054d3a6a2e5f8eb20 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 10:02:50 -0800 Subject: libperf tests: Fail when failing to get a tracepoint id Permissions are necessary to get a tracepoint id. Fail the test when the read fails. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114180250.3853825-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 220a95be47c3..e2ac0b7f432e 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -214,6 +214,7 @@ static int test_mmap_thread(void) sysfs__mountpoint()); if (filename__read_int(path, &id)) { + tests_failed++; fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); return -1; } -- cgit v1.3.1 From 3ff1e7180abc7f6db413933c110df69157216715 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Jan 2021 16:11:38 +0900 Subject: perf stat: Introduce struct runtime_stat_data To pass more info to the saved_value in the runtime_stat, add a new struct runtime_stat_data. Currently it only has 'ctx' field but later patch will add more. Note that we intentionally pass 0 as ctx to clock-related events for compatibility. It was already there in a few places. So move the code into the saved_value_lookup() explicitly and add a comment. Suggested-by: Andi Kleen Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210115071139.257042-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 346 +++++++++++++++++++++--------------------- 1 file changed, 173 insertions(+), 173 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 901265127e36..a1565b6e38f2 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -114,6 +114,10 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, rblist = &st->value_list; + /* don't use context info for clock events */ + if (type == STAT_NSECS) + dm.ctx = 0; + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); @@ -191,12 +195,17 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) reset_stat(st); } +struct runtime_stat_data { + int ctx; +}; + static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int ctx, int cpu, u64 count) + int cpu, u64 count, + struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu, true, - type, ctx, st); + struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, + rsd->ctx, st); if (v) update_stats(&v->stats, count); @@ -210,73 +219,75 @@ static void update_runtime_stat(struct runtime_stat *st, void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, int cpu, struct runtime_stat *st) { - int ctx = evsel_context(counter); u64 count_ns = count; struct saved_value *v; + struct runtime_stat_data rsd = { + .ctx = evsel_context(counter), + }; count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); + update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); + update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); + update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); + update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); + update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); + update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, ctx, cpu, count); + update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); @@ -422,11 +433,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); if (!v) return 0.0; @@ -434,11 +446,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); if (!v) return 0.0; @@ -446,16 +459,15 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -470,16 +482,15 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -490,17 +501,15 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -511,18 +520,15 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -533,18 +539,15 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -554,17 +557,15 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -574,17 +575,15 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -594,17 +593,15 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -662,56 +659,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) +static double td_total_slots(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); } -static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) +static double td_bad_spec(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); - total_slots = td_total_slots(ctx, cpu, st); + total_slots = td_total_slots(cpu, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu, struct runtime_stat *st) +static double td_retiring(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu); + cpu, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_fe_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu); + cpu, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_be_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(ctx, cpu, st) + - td_bad_spec(ctx, cpu, st) + - td_retiring(ctx, cpu, st)); + double sum = (td_fe_bound(cpu, st, rsd) + + td_bad_spec(cpu, st, rsd) + + td_retiring(cpu, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -722,15 +724,15 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int ctx, int cpu, - enum stat_type type, - struct runtime_stat *stat) +static double td_metric_ratio(int cpu, enum stat_type type, + struct runtime_stat *stat, + struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu); - double d = runtime_stat_avg(stat, type, ctx, cpu); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); + double d = runtime_stat_avg(stat, type, cpu, rsd); if (sum) return d / sum; @@ -742,34 +744,33 @@ static double td_metric_ratio(int ctx, int cpu, * We allow two missing. */ -static bool full_td(int ctx, int cpu, - struct runtime_stat *stat) +static bool full_td(int cpu, struct runtime_stat *stat, + struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, - int cpu, struct evsel *evsel, +static void print_smi_cost(struct perf_stat_config *config, int cpu, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double smi_num, aperf, cycles, cost = 0.0; - int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); - aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); - cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -930,12 +931,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; const char *color = NULL; - int ctx = evsel_context(evsel); + struct runtime_stat_data rsd = { + .ctx = evsel_context(evsel), + }; struct metric_event *me; int num = 1; if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) { ratio = avg / total; @@ -945,12 +948,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu)); + cpu, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -960,8 +962,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) - print_branch_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) + print_branch_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -970,8 +972,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) - print_l1_dcache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) + print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -980,8 +982,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) - print_l1_icache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) + print_l1_icache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -990,8 +992,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) - print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) + print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1000,8 +1002,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) - print_itlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) + print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1010,27 +1012,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) - print_ll_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) + print_ll_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) { ratio = avg / total; @@ -1039,7 +1041,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1049,8 +1051,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (total2 < avg) total2 = avg; @@ -1060,21 +1062,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; @@ -1087,28 +1087,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu, st); + double fe_bound = td_fe_bound(cpu, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu, st); + double retiring = td_retiring(cpu, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu, st); + double bad_spec = td_bad_spec(cpu, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu, st); + double be_bound = td_be_bound(cpu, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1121,43 +1121,43 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu, st) > 0) + if (td_total_slots(cpu, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(ctx, cpu, st)) { - double retiring = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_RETIRING, st); - + full_td(cpu, st, &rsd)) { + double retiring = td_metric_ratio(cpu, + STAT_TOPDOWN_RETIRING, st, + &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(ctx, cpu, st)) { - double fe_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_FE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_FE_BOUND, st, + &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(ctx, cpu, st)) { - double be_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double be_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_BE_BOUND, st, + &rsd); if (be_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(ctx, cpu, st)) { - double bad_spec = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BAD_SPEC, st); - + full_td(cpu, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu, + STAT_TOPDOWN_BAD_SPEC, st, + &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", @@ -1165,11 +1165,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { + } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { char unit = 'M'; char unit_buf[10]; - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) ratio = 1000.0 * avg / total; @@ -1180,7 +1180,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu, evsel, out, st); + print_smi_cost(config, cpu, out, st, &rsd); } else { num = 0; } -- cgit v1.3.1 From a1bf23052bdfe30ec3c693cf32feb2d79114ac16 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Jan 2021 16:11:39 +0900 Subject: perf stat: Take cgroups into account for shadow stats As of now it doesn't consider cgroups when collecting shadow stats and metrics so counter values from different cgroups will be saved in a same slot. This resulted in incorrect numbers when those cgroups have different workloads. For example, let's look at the scenario below: cgroups A and C runs same workload which burns a cpu while cgroup B runs a light workload. $ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1 Performance counter stats for 'system wide': 3,958,116,522 cycles A 6,722,650,929 instructions A # 2.53 insn per cycle 1,132,741 cycles B 571,743 instructions B # 0.00 insn per cycle 4,007,799,935 cycles C 6,793,181,523 instructions C # 2.56 insn per cycle 1.001050869 seconds time elapsed When I run 'perf stat' with single workload, it usually shows IPC around 1.7. We can verify it (6,722,650,929.0 / 3,958,116,522 = 1.698) for cgroup A. But in this case, since cgroups are ignored, cycles are averaged so it used the lower value for IPC calculation and resulted in around 2.5. avg cycle: (3958116522 + 1132741 + 4007799935) / 3 = 2655683066 IPC (A) : 6722650929 / 2655683066 = 2.531 IPC (B) : 571743 / 2655683066 = 0.0002 IPC (C) : 6793181523 / 2655683066 = 2.557 We can simply compare cgroup pointers in the evsel and it'll be NULL when cgroups are not specified. With this patch, I can see correct numbers like below: $ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1 Performance counter stats for 'system wide': 4,171,051,687 cycles A 7,219,793,922 instructions A # 1.73 insn per cycle 1,051,189 cycles B 583,102 instructions B # 0.55 insn per cycle 4,171,124,710 cycles C 7,192,944,580 instructions C # 1.72 insn per cycle 1.007909814 seconds time elapsed Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210115071139.257042-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a1565b6e38f2..12eafd12a693 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "expr.h" #include "metricgroup.h" +#include "cgroup.h" #include /* @@ -28,6 +29,7 @@ struct saved_value { enum stat_type type; int ctx; int cpu; + struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; u64 metric_total; @@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->ctx != b->ctx) return a->ctx - b->ctx; + if (a->cgrp != b->cgrp) + return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1; + if (a->evsel == NULL && b->evsel == NULL) { if (a->stat == b->stat) return 0; @@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, bool create, enum stat_type type, int ctx, - struct runtime_stat *st) + struct runtime_stat *st, + struct cgroup *cgrp) { struct rblist *rblist; struct rb_node *nd; @@ -110,6 +116,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, .type = type, .ctx = ctx, .stat = st, + .cgrp = cgrp, }; rblist = &st->value_list; @@ -197,6 +204,7 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) struct runtime_stat_data { int ctx; + struct cgroup *cgrp; }; static void update_runtime_stat(struct runtime_stat *st, @@ -205,7 +213,7 @@ static void update_runtime_stat(struct runtime_stat *st, struct runtime_stat_data *rsd) { struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, - rsd->ctx, st); + rsd->ctx, st, rsd->cgrp); if (v) update_stats(&v->stats, count); @@ -223,6 +231,7 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, struct saved_value *v; struct runtime_stat_data rsd = { .ctx = evsel_context(counter), + .cgrp = counter->cgrp, }; count *= counter->scale; @@ -290,13 +299,14 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); + v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, + rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu, true, STAT_NONE, 0, st); + cpu, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -438,7 +448,7 @@ static double runtime_stat_avg(struct runtime_stat *st, { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -451,7 +461,7 @@ static double runtime_stat_n(struct runtime_stat *st, { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -805,7 +815,8 @@ static int prepare_metric(struct evsel **metric_events, scale = 1e-9; } else { v = saved_value_lookup(metric_events[i], cpu, false, - STAT_NONE, 0, st); + STAT_NONE, 0, st, + metric_events[i]->cgrp); if (!v) break; stats = &v->stats; @@ -933,6 +944,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, const char *color = NULL; struct runtime_stat_data rsd = { .ctx = evsel_context(evsel), + .cgrp = evsel->cgrp, }; struct metric_event *me; int num = 1; -- cgit v1.3.1 From 5501e9229a80d95a1ea68609f44c447a75d23ed5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 7 Jan 2021 19:41:59 +0200 Subject: perf intel-pt: Fix 'CPU too large' error In some cases, the number of cpus (nr_cpus_online) is confused with the maximum cpu number (nr_cpus_avail), which results in the error in the example below: Example on system with 8 cpus: Before: # echo 0 > /sys/devices/system/cpu/cpu2/online # ./perf record --kcore -e intel_pt// taskset --cpu-list 7 uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.147 MB perf.data ] # ./perf script --itrace=e Requested CPU 7 too large. Consider raising MAX_NR_CPUS 0x25908 [0x8]: failed to process type: 68 [Invalid argument] After: # ./perf script --itrace=e # Fixes: 8c7274691f0d ("perf machine: Replace MAX_NR_CPUS with perf_env::nr_cpus_online") Fixes: 7df4e36a4785 ("perf session: Replace MAX_NR_CPUS with perf_env::nr_cpus_online") Signed-off-by: Adrian Hunter Tested-by: Kan Liang Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210107174159.24897-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 ++-- tools/perf/util/session.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f841f3503cae..1e9d3f982b47 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2980,7 +2980,7 @@ int machines__for_each_thread(struct machines *machines, pid_t machine__get_current_tid(struct machine *machine, int cpu) { - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid) return -1; @@ -2992,7 +2992,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid) { struct thread *thread; - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0) return -EINVAL; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 50ff9795a4f1..25adbcce0281 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2404,7 +2404,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); for (i = 0; i < PERF_TYPE_MAX; ++i) { struct evsel *evsel; -- cgit v1.3.1 From 648b054a4647cd62e13ba79f398b8b97a7c82b19 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Tue, 24 Nov 2020 19:58:17 +0000 Subject: perf inject: Correct event attribute sizes When 'perf inject' reads a perf.data file from an older version of perf, it writes event attributes into the output with the original size field, but lays them out as if they had the size currently used. Readers see a corrupt file. Update the size field to match the layout. Signed-off-by: Al Grant Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201124195818.30603-1-al.grant@arm.com Signed-off-by: Denis Nikitin Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 062383e225a3..c4ed3dc2c8f4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3323,6 +3323,14 @@ int perf_session__write_header(struct perf_session *session, attr_offset = lseek(ff.fd, 0, SEEK_CUR); evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.size < sizeof(evsel->core.attr)) { + /* + * We are likely in "perf inject" and have read + * from an older file. Update attr size so that + * reader gets the right offset to the ids. + */ + evsel->core.attr.size = sizeof(evsel->core.attr); + } f_attr = (struct perf_file_attr){ .attr = evsel->core.attr, .ids = { -- cgit v1.3.1 From ebfd44883ab5dd9a201af2d936e1dfb93962be0b Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 11 Dec 2020 14:32:32 -0800 Subject: kunit: tool: Fix spelling of "diagnostic" in kunit_parser Various helper functions were misspelling "diagnostic" in their names. It finally got annoying, so fix it. Signed-off-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 6614ec4d0898..1a1e1d13f1d3 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -97,11 +97,11 @@ def print_log(log): TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$') -def consume_non_diagnositic(lines: List[str]) -> None: +def consume_non_diagnostic(lines: List[str]) -> None: while lines and not TAP_ENTRIES.match(lines[0]): lines.pop(0) -def save_non_diagnositic(lines: List[str], test_case: TestCase) -> None: +def save_non_diagnostic(lines: List[str], test_case: TestCase) -> None: while lines and not TAP_ENTRIES.match(lines[0]): test_case.log.append(lines[0]) lines.pop(0) @@ -113,7 +113,7 @@ OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$') def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) if not lines: test_case.status = TestStatus.TEST_CRASHED return True @@ -139,7 +139,7 @@ SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$') DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$') def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) if not lines: return False line = lines[0] @@ -155,7 +155,7 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: def parse_test_case(lines: List[str]) -> Optional[TestCase]: test_case = TestCase() - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) while parse_diagnostic(lines, test_case): pass if parse_ok_not_ok_test_case(lines, test_case): @@ -166,7 +166,7 @@ def parse_test_case(lines: List[str]) -> Optional[TestCase]: SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') def parse_subtest_header(lines: List[str]) -> Optional[str]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines: return None match = SUBTEST_HEADER.match(lines[0]) @@ -179,7 +179,7 @@ def parse_subtest_header(lines: List[str]) -> Optional[str]: SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') def parse_subtest_plan(lines: List[str]) -> Optional[int]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) match = SUBTEST_PLAN.match(lines[0]) if match: lines.pop(0) @@ -202,7 +202,7 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus: def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite, expected_suite_index: int) -> bool: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines: test_suite.status = TestStatus.TEST_CRASHED return False @@ -235,7 +235,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]: if not lines: return None - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) test_suite = TestSuite() test_suite.status = TestStatus.SUCCESS name = parse_subtest_header(lines) @@ -264,7 +264,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[Te TAP_HEADER = re.compile(r'^TAP version 14$') def parse_tap_header(lines: List[str]) -> bool: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if TAP_HEADER.match(lines[0]): lines.pop(0) return True @@ -274,7 +274,7 @@ def parse_tap_header(lines: List[str]) -> bool: TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') def parse_test_plan(lines: List[str]) -> Optional[int]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) match = TEST_PLAN.match(lines[0]) if match: lines.pop(0) @@ -286,7 +286,7 @@ def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus: return bubble_up_errors(lambda x: x.status, test_suite_list) def parse_test_result(lines: List[str]) -> TestResult: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines or not parse_tap_header(lines): return TestResult(TestStatus.NO_TESTS, [], lines) expected_test_suite_num = parse_test_plan(lines) -- cgit v1.3.1 From 09641f7c7d8f1309fe9ad9ce4e6a1697016d73ba Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 14 Jan 2021 16:39:11 -0800 Subject: kunit: tool: surface and address more typing issues The authors of this tool were more familiar with a different type-checker, https://github.com/google/pytype. That's open source, but mypy seems more prevalent (and runs faster). And unlike pytype, mypy doesn't try to infer types so it doesn't check unanotated functions. So annotate ~all functions in kunit tool to increase type-checking coverage. Note: per https://www.python.org/dev/peps/pep-0484/, `__init__()` should be annotated as `-> None`. Doing so makes mypy discover a number of new violations. Exclude main() since we reuse `request` for the different types of requests, which mypy isn't happy about. This commit fixes all but one error, where `TestSuite.status` might be None. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: Brendan Higgins Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 14 +++++------ tools/testing/kunit/kunit_config.py | 7 +++--- tools/testing/kunit/kunit_json.py | 2 +- tools/testing/kunit/kunit_kernel.py | 37 ++++++++++++++--------------- tools/testing/kunit/kunit_parser.py | 46 ++++++++++++++++++------------------- 5 files changed, 54 insertions(+), 52 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 21516e293d17..5521e0a8201e 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -43,9 +43,9 @@ class KunitStatus(Enum): BUILD_FAILURE = auto() TEST_FAILURE = auto() -def get_kernel_root_path(): - parts = sys.argv[0] if not __file__ else __file__ - parts = os.path.realpath(parts).split('tools/testing/kunit') +def get_kernel_root_path() -> str: + path = sys.argv[0] if not __file__ else __file__ + parts = os.path.realpath(path).split('tools/testing/kunit') if len(parts) != 2: sys.exit(1) return parts[0] @@ -171,7 +171,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, exec_result.elapsed_time)) return parse_result -def add_common_opts(parser): +def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' 'directory.', @@ -183,13 +183,13 @@ def add_common_opts(parser): help='Run all KUnit tests through allyesconfig', action='store_true') -def add_build_opts(parser): +def add_build_opts(parser) -> None: parser.add_argument('--jobs', help='As in the make command, "Specifies the number of ' 'jobs (commands) to run simultaneously."', type=int, default=8, metavar='jobs') -def add_exec_opts(parser): +def add_exec_opts(parser) -> None: parser.add_argument('--timeout', help='maximum number of seconds to allow for all tests ' 'to run. This does not include time taken to build the ' @@ -198,7 +198,7 @@ def add_exec_opts(parser): default=300, metavar='timeout') -def add_parse_opts(parser): +def add_parse_opts(parser) -> None: parser.add_argument('--raw_output', help='don\'t format output from kernel', action='store_true') parser.add_argument('--json', diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 02ffc3a3e5dc..bdd60230764b 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -8,6 +8,7 @@ import collections import re +from typing import List, Set CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' @@ -30,10 +31,10 @@ class KconfigParseError(Exception): class Kconfig(object): """Represents defconfig or .config specified using the Kconfig language.""" - def __init__(self): - self._entries = [] + def __init__(self) -> None: + self._entries = [] # type: List[KconfigEntry] - def entries(self): + def entries(self) -> Set[KconfigEntry]: return set(self._entries) def add_entry(self, entry: KconfigEntry) -> None: diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 624b31b2dbd6..f5cca5c38cac 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -13,7 +13,7 @@ import kunit_parser from kunit_parser import TestStatus -def get_json_result(test_result, def_config, build_dir, json_path): +def get_json_result(test_result, def_config, build_dir, json_path) -> str: sub_groups = [] # Each test suite is mapped to a KernelCI sub_group diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 698358c9c0d6..e77ee06aa407 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -11,6 +11,7 @@ import subprocess import os import shutil import signal +from typing import Iterator from contextlib import ExitStack @@ -39,7 +40,7 @@ class BuildError(Exception): class LinuxSourceTreeOperations(object): """An abstraction over command line operations performed on a source tree.""" - def make_mrproper(self): + def make_mrproper(self) -> None: try: subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT) except OSError as e: @@ -47,7 +48,7 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make_olddefconfig(self, build_dir, make_options): + def make_olddefconfig(self, build_dir, make_options) -> None: command = ['make', 'ARCH=um', 'olddefconfig'] if make_options: command.extend(make_options) @@ -60,7 +61,7 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make_allyesconfig(self, build_dir, make_options): + def make_allyesconfig(self, build_dir, make_options) -> None: kunit_parser.print_with_timestamp( 'Enabling all CONFIGs for UML...') command = ['make', 'ARCH=um', 'allyesconfig'] @@ -82,7 +83,7 @@ class LinuxSourceTreeOperations(object): kunit_parser.print_with_timestamp( 'Starting Kernel with all configs takes a few minutes...') - def make(self, jobs, build_dir, make_options): + def make(self, jobs, build_dir, make_options) -> None: command = ['make', 'ARCH=um', '--jobs=' + str(jobs)] if make_options: command.extend(make_options) @@ -100,7 +101,7 @@ class LinuxSourceTreeOperations(object): if stderr: # likely only due to build warnings print(stderr.decode()) - def linux_bin(self, params, timeout, build_dir): + def linux_bin(self, params, timeout, build_dir) -> None: """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = get_file_path(build_dir, 'linux') outfile = get_outfile_path(build_dir) @@ -110,23 +111,23 @@ class LinuxSourceTreeOperations(object): stderr=subprocess.STDOUT) process.wait(timeout) -def get_kconfig_path(build_dir): +def get_kconfig_path(build_dir) -> str: return get_file_path(build_dir, KCONFIG_PATH) -def get_kunitconfig_path(build_dir): +def get_kunitconfig_path(build_dir) -> str: return get_file_path(build_dir, KUNITCONFIG_PATH) -def get_outfile_path(build_dir): +def get_outfile_path(build_dir) -> str: return get_file_path(build_dir, OUTFILE_PATH) class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" - def __init__(self): + def __init__(self) -> None: self._ops = LinuxSourceTreeOperations() signal.signal(signal.SIGINT, self.signal_handler) - def clean(self): + def clean(self) -> bool: try: self._ops.make_mrproper() except ConfigError as e: @@ -134,17 +135,17 @@ class LinuxSourceTree(object): return False return True - def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH): + def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None: kunitconfig_path = get_kunitconfig_path(build_dir) if not os.path.exists(kunitconfig_path): shutil.copyfile(defconfig, kunitconfig_path) - def read_kunitconfig(self, build_dir): + def read_kunitconfig(self, build_dir) -> None: kunitconfig_path = get_kunitconfig_path(build_dir) self._kconfig = kunit_config.Kconfig() self._kconfig.read_from_file(kunitconfig_path) - def validate_config(self, build_dir): + def validate_config(self, build_dir) -> bool: kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.Kconfig() validated_kconfig.read_from_file(kconfig_path) @@ -158,7 +159,7 @@ class LinuxSourceTree(object): return False return True - def build_config(self, build_dir, make_options): + def build_config(self, build_dir, make_options) -> bool: kconfig_path = get_kconfig_path(build_dir) if build_dir and not os.path.exists(build_dir): os.mkdir(build_dir) @@ -170,7 +171,7 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def build_reconfig(self, build_dir, make_options): + def build_reconfig(self, build_dir, make_options) -> bool: """Creates a new .config if it is not a subset of the .kunitconfig.""" kconfig_path = get_kconfig_path(build_dir) if os.path.exists(kconfig_path): @@ -186,7 +187,7 @@ class LinuxSourceTree(object): print('Generating .config ...') return self.build_config(build_dir, make_options) - def build_um_kernel(self, alltests, jobs, build_dir, make_options): + def build_um_kernel(self, alltests, jobs, build_dir, make_options) -> bool: try: if alltests: self._ops.make_allyesconfig(build_dir, make_options) @@ -197,7 +198,7 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def run_kernel(self, args=[], build_dir='', timeout=None): + def run_kernel(self, args=[], build_dir='', timeout=None) -> Iterator[str]: args.extend(['mem=1G', 'console=tty']) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) @@ -206,6 +207,6 @@ class LinuxSourceTree(object): for line in file: yield line - def signal_handler(self, sig, frame): + def signal_handler(self, sig, frame) -> None: logging.error('Build interruption occurred. Cleaning console.') subprocess.call(['stty', 'sane']) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 1a1e1d13f1d3..bd90d7b86b76 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,32 +12,32 @@ from collections import namedtuple from datetime import datetime from enum import Enum, auto from functools import reduce -from typing import List, Optional, Tuple +from typing import Iterator, List, Optional, Tuple TestResult = namedtuple('TestResult', ['status','suites','log']) class TestSuite(object): - def __init__(self): - self.status = None - self.name = None - self.cases = [] + def __init__(self) -> None: + self.status = None # type: Optional[TestStatus] + self.name = '' + self.cases = [] # type: List[TestCase] - def __str__(self): - return 'TestSuite(' + self.status + ',' + self.name + ',' + str(self.cases) + ')' + def __str__(self) -> str: + return 'TestSuite(' + str(self.status) + ',' + self.name + ',' + str(self.cases) + ')' - def __repr__(self): + def __repr__(self) -> str: return str(self) class TestCase(object): - def __init__(self): - self.status = None + def __init__(self) -> None: + self.status = None # type: Optional[TestStatus] self.name = '' - self.log = [] + self.log = [] # type: List[str] - def __str__(self): - return 'TestCase(' + self.status + ',' + self.name + ',' + str(self.log) + ')' + def __str__(self) -> str: + return 'TestCase(' + str(self.status) + ',' + self.name + ',' + str(self.log) + ')' - def __repr__(self): + def __repr__(self) -> str: return str(self) class TestStatus(Enum): @@ -51,7 +51,7 @@ kunit_start_re = re.compile(r'TAP version [0-9]+$') kunit_end_re = re.compile('(List of all partitions:|' 'Kernel panic - not syncing: VFS:)') -def isolate_kunit_output(kernel_output): +def isolate_kunit_output(kernel_output) -> Iterator[str]: started = False for line in kernel_output: line = line.rstrip() # line always has a trailing \n @@ -64,7 +64,7 @@ def isolate_kunit_output(kernel_output): elif started: yield line[prefix_len:] if prefix_len > 0 else line -def raw_output(kernel_output): +def raw_output(kernel_output) -> None: for line in kernel_output: print(line.rstrip()) @@ -72,26 +72,26 @@ DIVIDER = '=' * 60 RESET = '\033[0;0m' -def red(text): +def red(text) -> str: return '\033[1;31m' + text + RESET -def yellow(text): +def yellow(text) -> str: return '\033[1;33m' + text + RESET -def green(text): +def green(text) -> str: return '\033[1;32m' + text + RESET -def print_with_timestamp(message): +def print_with_timestamp(message) -> None: print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message)) -def format_suite_divider(message): +def format_suite_divider(message) -> str: return '======== ' + message + ' ========' -def print_suite_divider(message): +def print_suite_divider(message) -> None: print_with_timestamp(DIVIDER) print_with_timestamp(format_suite_divider(message)) -def print_log(log): +def print_log(log) -> None: for m in log: print_with_timestamp(m) -- cgit v1.3.1 From 81c60306dc588e2e6b21391c1f6dd509403e6eec Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 14 Jan 2021 16:39:12 -0800 Subject: kunit: tool: fix minor typing issue with None status The code to handle aggregating statuses didn't check that the status actually got set to some non-None value. Default the value to SUCCESS instead of adding a bunch of `is None` checks. This sorta follows the precedent in commit 3fc48259d525 ("kunit: Don't fail test suites if one of them is empty"). Also slightly simplify the code and add type annotations. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index bd90d7b86b76..e8bcc139702e 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,13 +12,13 @@ from collections import namedtuple from datetime import datetime from enum import Enum, auto from functools import reduce -from typing import Iterator, List, Optional, Tuple +from typing import Iterable, Iterator, List, Optional, Tuple TestResult = namedtuple('TestResult', ['status','suites','log']) class TestSuite(object): def __init__(self) -> None: - self.status = None # type: Optional[TestStatus] + self.status = TestStatus.SUCCESS self.name = '' self.cases = [] # type: List[TestCase] @@ -30,7 +30,7 @@ class TestSuite(object): class TestCase(object): def __init__(self) -> None: - self.status = None # type: Optional[TestStatus] + self.status = TestStatus.SUCCESS self.name = '' self.log = [] # type: List[str] @@ -224,12 +224,11 @@ def parse_ok_not_ok_test_suite(lines: List[str], else: return False -def bubble_up_errors(to_status, status_container_list) -> TestStatus: - status_list = map(to_status, status_container_list) - return reduce(max_status, status_list, TestStatus.SUCCESS) +def bubble_up_errors(statuses: Iterable[TestStatus]) -> TestStatus: + return reduce(max_status, statuses, TestStatus.SUCCESS) def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: - max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) + max_test_case_status = bubble_up_errors(x.status for x in test_suite.cases) return max_status(max_test_case_status, test_suite.status) def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]: @@ -282,8 +281,8 @@ def parse_test_plan(lines: List[str]) -> Optional[int]: else: return None -def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus: - return bubble_up_errors(lambda x: x.status, test_suite_list) +def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus: + return bubble_up_errors(x.status for x in test_suites) def parse_test_result(lines: List[str]) -> TestResult: consume_non_diagnostic(lines) -- cgit v1.3.1 From 2b8fdbbf1c616300312f71fe5b21fe8f03129950 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 14 Jan 2021 16:39:13 -0800 Subject: kunit: tool: move kunitconfig parsing into __init__, make it optional LinuxSourceTree will unceremoniously crash if the user doesn't call read_kunitconfig() first in a number of functions. And currently every place we create an instance, the caller also calls create_kunitconfig() and read_kunitconfig(). Move these instead into __init__() so they can't be forgotten and to reduce copy-paste. The https://github.com/google/pytype type-checker complained that _config wasn't initialized. With this, kunit_tool now type checks under both pytype and mypy. Add an optional boolean that can be used to disable this for use cases in the future where we might not need/want to load the config. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 20 ++++---------------- tools/testing/kunit/kunit_kernel.py | 25 +++++++++++++------------ 2 files changed, 17 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 5521e0a8201e..e808a47c839b 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -256,10 +256,7 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) request = KunitRequest(cli_args.raw_output, cli_args.timeout, @@ -277,10 +274,7 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) request = KunitConfigRequest(cli_args.build_dir, cli_args.make_options) @@ -292,10 +286,7 @@ def main(argv, linux=None): sys.exit(1) elif cli_args.subcommand == 'build': if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) request = KunitBuildRequest(cli_args.jobs, cli_args.build_dir, @@ -309,10 +300,7 @@ def main(argv, linux=None): sys.exit(1) elif cli_args.subcommand == 'exec': if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) exec_request = KunitExecRequest(cli_args.timeout, cli_args.build_dir, diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index e77ee06aa407..2076a5a2d060 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -123,28 +123,29 @@ def get_outfile_path(build_dir) -> str: class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" - def __init__(self) -> None: - self._ops = LinuxSourceTreeOperations() + def __init__(self, build_dir: str, load_config=True, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None: signal.signal(signal.SIGINT, self.signal_handler) - def clean(self) -> bool: - try: - self._ops.make_mrproper() - except ConfigError as e: - logging.error(e) - return False - return True + self._ops = LinuxSourceTreeOperations() + + if not load_config: + return - def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None: kunitconfig_path = get_kunitconfig_path(build_dir) if not os.path.exists(kunitconfig_path): shutil.copyfile(defconfig, kunitconfig_path) - def read_kunitconfig(self, build_dir) -> None: - kunitconfig_path = get_kunitconfig_path(build_dir) self._kconfig = kunit_config.Kconfig() self._kconfig.read_from_file(kunitconfig_path) + def clean(self) -> bool: + try: + self._ops.make_mrproper() + except ConfigError as e: + logging.error(e) + return False + return True + def validate_config(self, build_dir) -> bool: kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.Kconfig() -- cgit v1.3.1 From 2fe7c2f99440d52613e1cf845c96e8e463c28111 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 7 Jan 2021 12:00:19 +0800 Subject: tools: gpio: fix %llu warning in gpio-event-mon.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platforms, such as mips64, don't map __u64 to long long unsigned int so using %llu produces a warning: gpio-event-mon.c:110:37: warning: format ‘%llu’ expects argument of type ‘long long unsigned int’, but argument 3 has type ‘__u64’ {aka ‘long unsigned int’} [-Wformat=] 110 | fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", | ~~~^ | | | long long unsigned int | %lu 111 | event.timestamp_ns, event.offset, event.line_seqno, | ~~~~~~~~~~~~~~~~~~ | | | __u64 {aka long unsigned int} Replace the %llu with PRIu64 and cast the argument to uint64_t. Fixes: 03fd11b03362 ("tools/gpio/gpio-event-mon: fix warning") Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-event-mon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index cacd66ad7926..a2b233fdb572 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -107,8 +107,8 @@ int monitor_device(const char *device_name, ret = -EIO; break; } - fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", - event.timestamp_ns, event.offset, event.line_seqno, + fprintf(stdout, "GPIO EVENT at %" PRIu64 " on line %d (%d|%d) ", + (uint64_t)event.timestamp_ns, event.offset, event.line_seqno, event.seqno); switch (event.id) { case GPIO_V2_LINE_EVENT_RISING_EDGE: -- cgit v1.3.1 From 1fc7c1ef37f86f207b4db40aba57084bb2f6a69a Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 7 Jan 2021 12:00:20 +0800 Subject: tools: gpio: fix %llu warning in gpio-watch.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platforms, such as mips64, don't map __u64 to long long unsigned int so using %llu produces a warning: gpio-watch.c: In function ‘main’: gpio-watch.c:89:30: warning: format ‘%llu’ expects argument of type ‘long long unsigned int’, but argument 4 has type ‘__u64’ {aka ‘long unsigned int’} [-Wformat=] 89 | printf("line %u: %s at %llu\n", | ~~~^ | | | long long unsigned int | %lu 90 | chg.info.offset, event, chg.timestamp_ns); | ~~~~~~~~~~~~~~~~ | | | __u64 {aka long unsigned int} Replace the %llu with PRIu64 and cast the argument to uint64_t. Fixes: 33f0c47b8fb4 ("tools: gpio: implement gpio-watch") Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-watch.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c index f229ec62301b..41e76d244192 100644 --- a/tools/gpio/gpio-watch.c +++ b/tools/gpio/gpio-watch.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -86,8 +87,8 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - printf("line %u: %s at %llu\n", - chg.info.offset, event, chg.timestamp_ns); + printf("line %u: %s at %" PRIu64 "\n", + chg.info.offset, event, (uint64_t)chg.timestamp_ns); } } -- cgit v1.3.1 From 92a5e1fdb286851d5bd0eb966b8d075be27cf5ee Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 18 Jan 2021 15:01:45 +0530 Subject: selftests/powerpc: Fix exit status of pkey tests Since main() does not return a value explicitly, the return values from FAIL_IF() conditions are ignored and the tests can still pass irrespective of failures. This makes sure that we always explicitly return the correct test exit status. Fixes: 1addb6444791 ("selftests/powerpc: Add test for execute-disabled pkeys") Fixes: c27f2fd1705a ("selftests/powerpc: Add test for pkey siginfo verification") Reported-by: Eirik Fuller Signed-off-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210118093145.10134-1-sandipan@linux.ibm.com --- tools/testing/selftests/powerpc/mm/pkey_exec_prot.c | 2 +- tools/testing/selftests/powerpc/mm/pkey_siginfo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c index 9e5c7f3f498a..0af4f02669a1 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -290,5 +290,5 @@ static int test(void) int main(void) { - test_harness(test, "pkey_exec_prot"); + return test_harness(test, "pkey_exec_prot"); } diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c index 4f815d7c1214..2db76e56d4cb 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -329,5 +329,5 @@ static int test(void) int main(void) { - test_harness(test, "pkey_siginfo"); + return test_harness(test, "pkey_siginfo"); } -- cgit v1.3.1 From dd3a44c06f7b4f14e90065bf05d62c255b20005f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 19 Jan 2021 15:18:00 +1100 Subject: selftests/powerpc: Only test lwm/stmw on big endian Newer binutils (>= 2.36) refuse to assemble lmw/stmw when building in little endian mode. That breaks compilation of our alignment handler test: /tmp/cco4l14N.s: Assembler messages: /tmp/cco4l14N.s:1440: Error: `lmw' invalid when little-endian /tmp/cco4l14N.s:1814: Error: `stmw' invalid when little-endian make[2]: *** [../../lib.mk:139: /output/kselftest/powerpc/alignment/alignment_handler] Error 1 These tests do pass on little endian machines, as the kernel will still emulate those instructions even when running little endian (which is arguably a kernel bug). But we don't really need to test that case, so ifdef those instructions out to get the alignment test building again. Reported-by: Libor Pechacek Signed-off-by: Michael Ellerman Tested-by: Libor Pechacek Link: https://lore.kernel.org/r/20210119041800.3093047-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/alignment/alignment_handler.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index cb53a8b777e6..c25cf7cd45e9 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -443,7 +443,6 @@ int test_alignment_handler_integer(void) LOAD_DFORM_TEST(ldu); LOAD_XFORM_TEST(ldx); LOAD_XFORM_TEST(ldux); - LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stb); STORE_XFORM_TEST(stbx); STORE_DFORM_TEST(stbu); @@ -462,7 +461,11 @@ int test_alignment_handler_integer(void) STORE_XFORM_TEST(stdx); STORE_DFORM_TEST(stdu); STORE_XFORM_TEST(stdux); + +#ifdef __BIG_ENDIAN__ + LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stmw); +#endif return rc; } -- cgit v1.3.1 From fd23d2dc180fccfad4b27a8e52ba1bc415d18509 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 19 Jan 2021 10:59:30 +0800 Subject: selftests: net: fib_tests: remove duplicate log test The previous test added an address with a specified metric and check if correspond route was created. I somehow added two logs for the same test. Remove the duplicated one. Reported-by: Antoine Tenart Fixes: 0d29169a708b ("selftests/net/fib_tests: update addr_metric_test for peer route testing") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210119025930.2810532-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 84205c3a55eb..2b5707738609 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1055,7 +1055,6 @@ ipv6_addr_metric_test() check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on local side" - log_test $? 0 "User specified metric on local address" check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on peer side" -- cgit v1.3.1 From fc705fecf3a0c9128933cc6db59159c050aaca33 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 21 Jan 2021 14:54:46 +0200 Subject: perf evlist: Fix id index for heterogeneous systems perf_evlist__set_sid_idx() updates perf_sample_id with the evlist map index, CPU number and TID. It is passed indexes to the evsel's cpu and thread maps, but references the evlist's maps instead. That results in using incorrect CPU numbers on heterogeneous systems. Fix it by using evsel maps. The id index (PERF_RECORD_ID_INDEX) is used by AUX area tracing when in sampling mode. Having an incorrect CPU number causes the trace data to be attributed to the wrong CPU, and can result in decoder errors because the trace data is then associated with the wrong process. Committer notes: Keep the class prefix convention in the function name, switching from perf_evlist__set_sid_idx() to perf_evsel__set_sid_idx(). Fixes: 3c659eedada2fbf9 ("perf tools: Add id index") Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210121125446.11287-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index cfcdbd7be066..17465d454a0e 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -367,21 +367,13 @@ static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, boo return map; } -static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, - struct perf_evsel *evsel, int idx, int cpu, - int thread) +static void perf_evsel__set_sid_idx(struct perf_evsel *evsel, int idx, int cpu, int thread) { struct perf_sample_id *sid = SID(evsel, cpu, thread); sid->idx = idx; - if (evlist->cpus && cpu >= 0) - sid->cpu = evlist->cpus->map[cpu]; - else - sid->cpu = -1; - if (!evsel->system_wide && evlist->threads && thread >= 0) - sid->tid = perf_thread_map__pid(evlist->threads, thread); - else - sid->tid = -1; + sid->cpu = perf_cpu_map__cpu(evsel->cpus, cpu); + sid->tid = perf_thread_map__pid(evsel->threads, thread); } static struct perf_mmap* @@ -500,8 +492,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) return -1; - perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, - thread); + perf_evsel__set_sid_idx(evsel, idx, cpu, thread); } } -- cgit v1.3.1 From 9c880c24cb0db49d6e62e6d882df1470b0be8038 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 21 Jan 2021 00:18:38 +0800 Subject: perf metricgroup: Fix for metrics containing duration_time Metrics containing duration_time cause a segfault: $ perf stat -v -M L1D_Cache_Fill_BW sleep 1 Using CPUID GenuineIntel-6-3D-4 metric expr 64 * l1d.replacement / 1000000000 / duration_time for L1D_Cache_Fill_BW found event duration_time found event l1d.replacement adding {l1d.replacement}:W,duration_time l1d.replacement -> cpu/umask=0x1,(null)=0x1e8483,event=0x51/ Segmentation fault $ In commit c2337d67199a1ea1 ("perf metricgroup: Fix metrics using aliases covering multiple PMUs"), the logic in find_evsel_group() when iter'ing events was changed to not only select events in same group, but also for aliased PMUs. Checking whether events were for aliased PMUs was done by comparing the event PMU name. This was not safe for duration_time event, which has no associated PMU (and no PMU name), so fix by checking if the event PMU name is set also. Committer testing: Reproduced the bug, then, on a: $ grep -m1 ^'model name' /proc/cpuinfo model name : Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz $ We now get: $ perf stat -M L1D_Cache_Fill_BW sleep 1 Performance counter stats for 'sleep 1': 4,141 l1d.replacement:u 1,001,285,107 ns duration_time:u 1.001285107 seconds time elapsed 0.000000000 seconds user 0.001119000 seconds sys $ Detais from -v: Using CPUID GenuineIntel-6-8E-A metric expr 64 * l1d.replacement / 1000000000 / duration_time for L1D_Cache_Fill_BW found event duration_time found event l1d.replacement adding {l1d.replacement}:W,duration_time l1d.replacement -> cpu/(null)=0x1e8483,umask=0x1,event=0x51/ Control descriptor is not initialized Warning: kernel.perf_event_paranoid=2, trying to fall back to excluding kernel and hypervisor samples Warning: kernel.perf_event_paranoid=2, trying to fall back to excluding kernel and hypervisor samples l1d.replacement:u: 4592 612201 612201 duration_time:u: 1001478621 1001478621 1001478621 Fixes: c2337d67199a1ea1 ("perf metricgroup: Fix metrics using aliases covering multiple PMUs") Reported-by: Joakim Zhang Signed-off-by: John Garry Tested-by: Arnaldo Carvalho de Melo Tested-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxarm@openeuler.org Link: https://lore.kernel.org/r/1611159518-226883-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ee94d3e8dd65..8cb164ad06c1 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -162,6 +162,14 @@ static bool contains_event(struct evsel **metric_events, int num_events, return false; } +static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2) +{ + if (!ev1->pmu_name || !ev2->pmu_name) + return false; + + return !strcmp(ev1->pmu_name, ev2->pmu_name); +} + /** * Find a group of events in perf_evlist that correspond to those from a parsed * metric expression. Note, as find_evsel_group is called in the same order as @@ -280,8 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, */ if (!has_constraint && ev->leader != metric_events[i]->leader && - !strcmp(ev->leader->pmu_name, - metric_events[i]->leader->pmu_name)) + evsel_same_pmu(ev->leader, metric_events[i]->leader)) break; if (!strcmp(metric_events[i]->name, ev->name)) { set_bit(ev->idx, evlist_used); -- cgit v1.3.1 From 3d6e79ee9e8f8c6604312382c2be1d1bd1cffc9e Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 19 Jan 2021 18:04:15 +0800 Subject: perf metricgroup: Fix system PMU metrics Joakim reports that getting "perf stat" for multiple system PMU metrics segfaults: $ perf stat -a -I 1000 -M imx8mm_ddr_write.all,imx8mm_ddr_write.all Segmentation fault $ While the same works without issue for a single metric. The logic in metricgroup__add_metric_sys_event_iter() is broken, in that add_metric() @m argument should be NULL for each new metric. Fix by not passing a holder for that, and rather make local in metricgroup__add_metric_sys_event_iter(). Fixes: be335ec28efa ("perf metricgroup: Support adding metrics for system PMUs") Reported-by: Joakim Zhang Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxarm@openeuler.org Link: https://lore.kernel.org/r/1611050655-44020-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8cb164ad06c1..e6d3452031e5 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -773,7 +773,6 @@ int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) struct metricgroup_add_iter_data { struct list_head *metric_list; const char *metric; - struct metric **m; struct expr_ids *ids; int *ret; bool *has_match; @@ -1065,12 +1064,13 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, void *data) { struct metricgroup_add_iter_data *d = data; + struct metric *m = NULL; int ret; if (!match_pe_metric(pe, d->metric)) return 0; - ret = add_metric(d->metric_list, pe, d->metric_no_group, d->m, NULL, d->ids); + ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); if (ret) return ret; @@ -1121,7 +1121,6 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, .metric_list = &list, .metric = metric, .metric_no_group = metric_no_group, - .m = &m, .ids = &ids, .has_match = &has_match, .ret = &ret, -- cgit v1.3.1 From 8adc0a06d68a2e433b960377e515e7a6b19b429f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 9 Dec 2020 08:58:28 +0800 Subject: perf script: Fix overrun issue for dynamically-allocated PMU type number When unpacking the event which is from dynamic PMU, the array output[OUTPUT_TYPE_MAX] may be overrun. For example, type number of SKL uncore_imc is 10, but OUTPUT_TYPE_MAX is 7 now (OUTPUT_TYPE_MAX = PERF_TYPE_MAX + 1). /* In builtin-script.c */ process_event() { unsigned int type = output_type(attr->type); if (output[type].fields == 0) return; } output[10] is overrun. Create a type OUTPUT_TYPE_OTHER for dynamic PMU events, then output_type(attr->type) will return OUTPUT_TYPE_OTHER here. Note that if PERF_TYPE_MAX ever changed, then there would be a conflict between old perf.data files that had a dynamicaliy allocated PMU number that would then be the same as a fixed PERF_TYPE. Example: # perf record --switch-events -C 0 -e "{cpu-clock,uncore_imc/data_reads/,uncore_imc/data_writes/}:SD" -a -- sleep 1 # perf script Before: swapper 0 [000] 1479253.987551: 277766 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.987797: 246709 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.988127: 329883 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.988273: 146393 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.988523: 249977 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.988877: 354090 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.989023: 145940 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.989383: 359856 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.989523: 140082 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) After: swapper 0 [000] 1397040.402011: 272384 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1397040.402011: 5396 uncore_imc/data_reads/: swapper 0 [000] 1397040.402011: 967 uncore_imc/data_writes/: swapper 0 [000] 1397040.402259: 249153 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1397040.402259: 7231 uncore_imc/data_reads/: swapper 0 [000] 1397040.402259: 1297 uncore_imc/data_writes/: swapper 0 [000] 1397040.402508: 249108 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1397040.402508: 5333 uncore_imc/data_reads/: swapper 0 [000] 1397040.402508: 1008 uncore_imc/data_writes/: Signed-off-by: Jin Yao Acked-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Peter Zijlstra Cc: Kan Liang Cc: Andi Kleen Cc: Alexander Shishkin Cc: Ingo Molnar Link: https://lore.kernel.org/r/20201209005828.21302-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index edacfa98d073..42dad4a0f8cf 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -186,6 +186,7 @@ struct output_option { enum { OUTPUT_TYPE_SYNTH = PERF_TYPE_MAX, + OUTPUT_TYPE_OTHER, OUTPUT_TYPE_MAX }; @@ -283,6 +284,18 @@ static struct { .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, + + [OUTPUT_TYPE_OTHER] = { + .user_set = false, + + .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, + + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, + }, }; struct evsel_script { @@ -343,8 +356,11 @@ static inline int output_type(unsigned int type) case PERF_TYPE_SYNTH: return OUTPUT_TYPE_SYNTH; default: - return type; + if (type < PERF_TYPE_MAX) + return type; } + + return OUTPUT_TYPE_OTHER; } static inline unsigned int attr_type(unsigned int type) -- cgit v1.3.1 From 655cf86548a3938538642a6df27dd359e13c86bd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 14 Jan 2021 16:32:42 -0600 Subject: objtool: Don't fail the kernel build on fatal errors This is basically a revert of commit 644592d32837 ("objtool: Fail the kernel build on fatal errors"). That change turned out to be more trouble than it's worth. Failing the build is an extreme measure which sometimes gets too much attention and blocks CI build testing. These fatal-type warnings aren't yet as rare as we'd hope, due to the ever-increasing matrix of supported toolchains/plugins and their fast-changing nature as of late. Also, there are more people (and bots) looking for objtool warnings than ever before, so even non-fatal warnings aren't likely to be ignored for long. Suggested-by: Nick Desaulniers Reviewed-by: Miroslav Benes Reviewed-by: Nick Desaulniers Reviewed-by: Kamalesh Babulal Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5f8d3eed78a1..4bd30315eb62 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2928,14 +2928,10 @@ int check(struct objtool_file *file) warnings += ret; out: - if (ret < 0) { - /* - * Fatal error. The binary is corrupt or otherwise broken in - * some way, or objtool itself is broken. Fail the kernel - * build. - */ - return ret; - } - + /* + * For now, don't fail the kernel build on fatal warnings. These + * errors are still fairly common due to the growing matrix of + * supported toolchains and their recent pace of change. + */ return 0; } -- cgit v1.3.1 From 1d489151e9f9d1647110277ff77282fe4d96d09b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 14 Jan 2021 16:14:01 -0600 Subject: objtool: Don't fail on missing symbol table Thanks to a recent binutils change which doesn't generate unused symbols, it's now possible for thunk_64.o be completely empty without CONFIG_PREEMPTION: no text, no data, no symbols. We could edit the Makefile to only build that file when CONFIG_PREEMPTION is enabled, but that will likely create confusion if/when the thunks end up getting used by some other code again. Just ignore it and move on. Reported-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Reviewed-by: Miroslav Benes Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1254 Signed-off-by: Josh Poimboeuf --- tools/objtool/elf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index f9682db33cca..d8421e1d06be 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -380,8 +380,11 @@ static int read_symbols(struct elf *elf) symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { - WARN("missing symbol table"); - return -1; + /* + * A missing symbol table is actually possible if it's an empty + * .o file. This can happen for thunk_64.o. + */ + return 0; } symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); -- cgit v1.3.1 From 9a27e109a391c9021147553b97c3fe4356e2261c Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Tue, 22 Dec 2020 09:52:34 +0530 Subject: testing/nvdimm: Add test module for non-nfit platforms The current test module cannot be used for testing platforms (make check) that do not have support for NFIT. In order to get the ndctl tests working, we need a module which can emulate NVDIMM devices without relying on ACPI/NFIT. The aim of this proposed module is to implement a similar functionality to the existing module but without the ACPI dependencies. This RFC series is split into reviewable and compilable chunks. This patch adds a new driver and registers two nvdimm bus needed for ndctl make check. Signed-off-by: Santosh Sivaraj Link: https://lore.kernel.org/r/20201222042240.2983755-2-santosh@fossix.org Signed-off-by: Dan Williams --- tools/testing/nvdimm/config_check.c | 3 +- tools/testing/nvdimm/test/Kbuild | 6 +- tools/testing/nvdimm/test/ndtest.c | 206 ++++++++++++++++++++++++++++++++++++ tools/testing/nvdimm/test/ndtest.h | 16 +++ 4 files changed, 229 insertions(+), 2 deletions(-) create mode 100644 tools/testing/nvdimm/test/ndtest.c create mode 100644 tools/testing/nvdimm/test/ndtest.h (limited to 'tools') diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c index cac891028cd1..3e3a5f518864 100644 --- a/tools/testing/nvdimm/config_check.c +++ b/tools/testing/nvdimm/config_check.c @@ -12,7 +12,8 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); - BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); + if (IS_ENABLED(CONFIG_ACPI_NFIT)) + BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX)); BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM)); } diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild index 75baebf8f4ba..197bcb2b7f35 100644 --- a/tools/testing/nvdimm/test/Kbuild +++ b/tools/testing/nvdimm/test/Kbuild @@ -5,5 +5,9 @@ ccflags-y += -I$(srctree)/drivers/acpi/nfit/ obj-m += nfit_test.o obj-m += nfit_test_iomap.o -nfit_test-y := nfit.o +ifeq ($(CONFIG_ACPI_NFIT),m) + nfit_test-y := nfit.o +else + nfit_test-y := ndtest.o +endif nfit_test_iomap-y := iomap.o diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c new file mode 100644 index 000000000000..f89d74fdcdee --- /dev/null +++ b/tools/testing/nvdimm/test/ndtest.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../watermark.h" +#include "nfit_test.h" +#include "ndtest.h" + +enum { + DIMM_SIZE = SZ_32M, + LABEL_SIZE = SZ_128K, + NUM_INSTANCES = 2, + NUM_DCR = 4, +}; + +static struct ndtest_priv *instances[NUM_INSTANCES]; +static struct class *ndtest_dimm_class; + +static inline struct ndtest_priv *to_ndtest_priv(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + return container_of(pdev, struct ndtest_priv, pdev); +} + +static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len, int *cmd_rc) +{ + struct ndtest_dimm *dimm; + int _cmd_rc; + + if (!cmd_rc) + cmd_rc = &_cmd_rc; + + *cmd_rc = 0; + + if (!nvdimm) + return -EINVAL; + + dimm = nvdimm_provider_data(nvdimm); + if (!dimm) + return -EINVAL; + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: + case ND_CMD_GET_CONFIG_DATA: + case ND_CMD_SET_CONFIG_DATA: + default: + return -EINVAL; + } + + return 0; +} + +static int ndtest_bus_register(struct ndtest_priv *p) +{ + p->bus_desc.ndctl = ndtest_ctl; + p->bus_desc.module = THIS_MODULE; + p->bus_desc.provider_name = NULL; + + p->bus = nvdimm_bus_register(&p->pdev.dev, &p->bus_desc); + if (!p->bus) { + dev_err(&p->pdev.dev, "Error creating nvdimm bus %pOF\n", p->dn); + return -ENOMEM; + } + + return 0; +} + +static int ndtest_remove(struct platform_device *pdev) +{ + struct ndtest_priv *p = to_ndtest_priv(&pdev->dev); + + nvdimm_bus_unregister(p->bus); + return 0; +} + +static int ndtest_probe(struct platform_device *pdev) +{ + struct ndtest_priv *p; + + p = to_ndtest_priv(&pdev->dev); + if (ndtest_bus_register(p)) + return -ENOMEM; + + platform_set_drvdata(pdev, p); + + return 0; +} + +static const struct platform_device_id ndtest_id[] = { + { KBUILD_MODNAME }, + { }, +}; + +static struct platform_driver ndtest_driver = { + .probe = ndtest_probe, + .remove = ndtest_remove, + .driver = { + .name = KBUILD_MODNAME, + }, + .id_table = ndtest_id, +}; + +static void ndtest_release(struct device *dev) +{ + struct ndtest_priv *p = to_ndtest_priv(dev); + + kfree(p); +} + +static void cleanup_devices(void) +{ + int i; + + for (i = 0; i < NUM_INSTANCES; i++) + if (instances[i]) + platform_device_unregister(&instances[i]->pdev); + + nfit_test_teardown(); + + if (ndtest_dimm_class) + class_destroy(ndtest_dimm_class); +} + +static __init int ndtest_init(void) +{ + int rc, i; + + pmem_test(); + libnvdimm_test(); + device_dax_test(); + dax_pmem_test(); + dax_pmem_core_test(); +#ifdef CONFIG_DEV_DAX_PMEM_COMPAT + dax_pmem_compat_test(); +#endif + + ndtest_dimm_class = class_create(THIS_MODULE, "nfit_test_dimm"); + if (IS_ERR(ndtest_dimm_class)) { + rc = PTR_ERR(ndtest_dimm_class); + goto err_register; + } + + /* Each instance can be taken as a bus, which can have multiple dimms */ + for (i = 0; i < NUM_INSTANCES; i++) { + struct ndtest_priv *priv; + struct platform_device *pdev; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + rc = -ENOMEM; + goto err_register; + } + + INIT_LIST_HEAD(&priv->resources); + pdev = &priv->pdev; + pdev->name = KBUILD_MODNAME; + pdev->id = i; + pdev->dev.release = ndtest_release; + rc = platform_device_register(pdev); + if (rc) { + put_device(&pdev->dev); + goto err_register; + } + get_device(&pdev->dev); + + instances[i] = priv; + } + + rc = platform_driver_register(&ndtest_driver); + if (rc) + goto err_register; + + return 0; + +err_register: + pr_err("Error registering platform device\n"); + cleanup_devices(); + + return rc; +} + +static __exit void ndtest_exit(void) +{ + cleanup_devices(); + platform_driver_unregister(&ndtest_driver); +} + +module_init(ndtest_init); +module_exit(ndtest_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("IBM Corporation"); diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h new file mode 100644 index 000000000000..831ac5c65f50 --- /dev/null +++ b/tools/testing/nvdimm/test/ndtest.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef NDTEST_H +#define NDTEST_H + +#include +#include + +struct ndtest_priv { + struct platform_device pdev; + struct device_node *dn; + struct list_head resources; + struct nvdimm_bus_descriptor bus_desc; + struct nvdimm_bus *bus; +}; + +#endif /* NDTEST_H */ -- cgit v1.3.1 From 107b04e970cae754100efb99a5312c321208ca03 Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Tue, 22 Dec 2020 09:52:35 +0530 Subject: ndtest: Add compatability string to treat it as PAPR family Since this module is written to be platform agnostic, the module is made part of the PAPR_FAMILY. ndctl identifies the family using the compatible string inside of_node dir-entry. Signed-off-by: Santosh Sivaraj Link: https://lore.kernel.org/r/20201222042240.2983755-3-santosh@fossix.org Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/ndtest.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index f89d74fdcdee..001527b37a23 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -65,11 +65,34 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t compatible_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "nvdimm_test"); +} +static DEVICE_ATTR_RO(compatible); + +static struct attribute *of_node_attributes[] = { + &dev_attr_compatible.attr, + NULL +}; + +static const struct attribute_group of_node_attribute_group = { + .name = "of_node", + .attrs = of_node_attributes, +}; + +static const struct attribute_group *ndtest_attribute_groups[] = { + &of_node_attribute_group, + NULL, +}; + static int ndtest_bus_register(struct ndtest_priv *p) { p->bus_desc.ndctl = ndtest_ctl; p->bus_desc.module = THIS_MODULE; p->bus_desc.provider_name = NULL; + p->bus_desc.attr_groups = ndtest_attribute_groups; p->bus = nvdimm_bus_register(&p->pdev.dev, &p->bus_desc); if (!p->bus) { -- cgit v1.3.1 From 9399ab61ad82154911563dd8635c585e3f24b16a Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Tue, 22 Dec 2020 09:52:36 +0530 Subject: ndtest: Add dimms to the two buses A config array is used to hold the dimms for each bus. These dimms are registered with nvdimm, and new nvdimms are created on the buses. Signed-off-by: Santosh Sivaraj Link: https://lore.kernel.org/r/20201222042240.2983755-4-santosh@fossix.org Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/ndtest.c | 258 +++++++++++++++++++++++++++++++++++++ tools/testing/nvdimm/test/ndtest.h | 36 ++++++ 2 files changed, 294 insertions(+) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index 001527b37a23..a82790013f8a 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -25,8 +25,83 @@ enum { NUM_DCR = 4, }; +#define NDTEST_SCM_DIMM_CMD_MASK \ + ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ + (1ul << ND_CMD_GET_CONFIG_DATA) | \ + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) + +#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \ + (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \ + | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf)) + +static DEFINE_SPINLOCK(ndtest_lock); static struct ndtest_priv *instances[NUM_INSTANCES]; static struct class *ndtest_dimm_class; +static struct gen_pool *ndtest_pool; + +static struct ndtest_dimm dimm_group1[] = { + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0), + .uuid_str = "1e5c75d2-b618-11ea-9aa3-507b9ddc0f72", + .physical_id = 0, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1), + .uuid_str = "1c4d43ac-b618-11ea-be80-507b9ddc0f72", + .physical_id = 1, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0), + .uuid_str = "a9f17ffc-b618-11ea-b36d-507b9ddc0f72", + .physical_id = 2, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), + .uuid_str = "b6b83b22-b618-11ea-8aae-507b9ddc0f72", + .physical_id = 3, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), + .uuid_str = "bf9baaee-b618-11ea-b181-507b9ddc0f72", + .physical_id = 4, + .num_formats = 2, + }, +}; + +static struct ndtest_dimm dimm_group2[] = { + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0), + .uuid_str = "ca0817e2-b618-11ea-9db3-507b9ddc0f72", + .physical_id = 0, + .num_formats = 1, + }, +}; + +static struct ndtest_config bus_configs[NUM_INSTANCES] = { + /* bus 1 */ + { + .dimm_start = 0, + .dimm_count = ARRAY_SIZE(dimm_group1), + .dimms = dimm_group1, + }, + /* bus 2 */ + { + .dimm_start = ARRAY_SIZE(dimm_group1), + .dimm_count = ARRAY_SIZE(dimm_group2), + .dimms = dimm_group2, + }, +}; static inline struct ndtest_priv *to_ndtest_priv(struct device *dev) { @@ -65,6 +140,152 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static void ndtest_release_resource(void *data) +{ + struct nfit_test_resource *res = data; + + spin_lock(&ndtest_lock); + list_del(&res->list); + spin_unlock(&ndtest_lock); + + if (resource_size(&res->res) >= DIMM_SIZE) + gen_pool_free(ndtest_pool, res->res.start, + resource_size(&res->res)); + vfree(res->buf); + kfree(res); +} + +static void *ndtest_alloc_resource(struct ndtest_priv *p, size_t size, + dma_addr_t *dma) +{ + dma_addr_t __dma; + void *buf; + struct nfit_test_resource *res; + struct genpool_data_align data = { + .align = SZ_128M, + }; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return NULL; + + buf = vmalloc(size); + if (size >= DIMM_SIZE) + __dma = gen_pool_alloc_algo(ndtest_pool, size, + gen_pool_first_fit_align, &data); + else + __dma = (unsigned long) buf; + + if (!__dma) + goto buf_err; + + INIT_LIST_HEAD(&res->list); + res->dev = &p->pdev.dev; + res->buf = buf; + res->res.start = __dma; + res->res.end = __dma + size - 1; + res->res.name = "NFIT"; + spin_lock_init(&res->lock); + INIT_LIST_HEAD(&res->requests); + spin_lock(&ndtest_lock); + list_add(&res->list, &p->resources); + spin_unlock(&ndtest_lock); + + if (dma) + *dma = __dma; + + if (!devm_add_action(&p->pdev.dev, ndtest_release_resource, res)) + return res->buf; + +buf_err: + if (__dma && size >= DIMM_SIZE) + gen_pool_free(ndtest_pool, __dma, size); + if (buf) + vfree(buf); + kfree(res); + + return NULL; +} + +static void put_dimms(void *data) +{ + struct ndtest_priv *p = data; + int i; + + for (i = 0; i < p->config->dimm_count; i++) + if (p->config->dimms[i].dev) { + device_unregister(p->config->dimms[i].dev); + p->config->dimms[i].dev = NULL; + } +} + +static int ndtest_dimm_register(struct ndtest_priv *priv, + struct ndtest_dimm *dimm, int id) +{ + struct device *dev = &priv->pdev.dev; + unsigned long dimm_flags = dimm->flags; + + if (dimm->num_formats > 1) { + set_bit(NDD_ALIASING, &dimm_flags); + set_bit(NDD_LABELING, &dimm_flags); + } + + dimm->nvdimm = nvdimm_create(priv->bus, dimm, NULL, dimm_flags, + NDTEST_SCM_DIMM_CMD_MASK, 0, NULL); + if (!dimm->nvdimm) { + dev_err(dev, "Error creating DIMM object for %pOF\n", priv->dn); + return -ENXIO; + } + + dimm->dev = device_create_with_groups(ndtest_dimm_class, + &priv->pdev.dev, + 0, dimm, NULL, + "test_dimm%d", id); + if (!dimm->dev) { + pr_err("Could not create dimm device attributes\n"); + return -ENOMEM; + } + + return 0; +} + +static int ndtest_nvdimm_init(struct ndtest_priv *p) +{ + struct ndtest_dimm *d; + void *res; + int i, id; + + for (i = 0; i < p->config->dimm_count; i++) { + d = &p->config->dimms[i]; + d->id = id = p->config->dimm_start + i; + res = ndtest_alloc_resource(p, LABEL_SIZE, NULL); + if (!res) + return -ENOMEM; + + d->label_area = res; + sprintf(d->label_area, "label%d", id); + d->config_size = LABEL_SIZE; + + if (!ndtest_alloc_resource(p, d->size, + &p->dimm_dma[id])) + return -ENOMEM; + + if (!ndtest_alloc_resource(p, LABEL_SIZE, + &p->label_dma[id])) + return -ENOMEM; + + if (!ndtest_alloc_resource(p, LABEL_SIZE, + &p->dcr_dma[id])) + return -ENOMEM; + + d->address = p->dimm_dma[id]; + + ndtest_dimm_register(p, d, id); + } + + return 0; +} + static ssize_t compatible_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -89,6 +310,8 @@ static const struct attribute_group *ndtest_attribute_groups[] = { static int ndtest_bus_register(struct ndtest_priv *p) { + p->config = &bus_configs[p->pdev.id]; + p->bus_desc.ndctl = ndtest_ctl; p->bus_desc.module = THIS_MODULE; p->bus_desc.provider_name = NULL; @@ -114,14 +337,34 @@ static int ndtest_remove(struct platform_device *pdev) static int ndtest_probe(struct platform_device *pdev) { struct ndtest_priv *p; + int rc; p = to_ndtest_priv(&pdev->dev); if (ndtest_bus_register(p)) return -ENOMEM; + p->dcr_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + p->label_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + + rc = ndtest_nvdimm_init(p); + if (rc) + goto err; + + rc = devm_add_action_or_reset(&pdev->dev, put_dimms, p); + if (rc) + goto err; + platform_set_drvdata(pdev, p); return 0; + +err: + pr_err("%s:%d Failed nvdimm init\n", __func__, __LINE__); + return rc; } static const struct platform_device_id ndtest_id[] = { @@ -155,6 +398,10 @@ static void cleanup_devices(void) nfit_test_teardown(); + if (ndtest_pool) + gen_pool_destroy(ndtest_pool); + + if (ndtest_dimm_class) class_destroy(ndtest_dimm_class); } @@ -178,6 +425,17 @@ static __init int ndtest_init(void) goto err_register; } + ndtest_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE); + if (!ndtest_pool) { + rc = -ENOMEM; + goto err_register; + } + + if (gen_pool_add(ndtest_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) { + rc = -ENOMEM; + goto err_register; + } + /* Each instance can be taken as a bus, which can have multiple dimms */ for (i = 0; i < NUM_INSTANCES; i++) { struct ndtest_priv *priv; diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h index 831ac5c65f50..e607d72ffed1 100644 --- a/tools/testing/nvdimm/test/ndtest.h +++ b/tools/testing/nvdimm/test/ndtest.h @@ -5,12 +5,48 @@ #include #include +struct ndtest_config; + struct ndtest_priv { struct platform_device pdev; struct device_node *dn; struct list_head resources; struct nvdimm_bus_descriptor bus_desc; struct nvdimm_bus *bus; + struct ndtest_config *config; + + dma_addr_t *dcr_dma; + dma_addr_t *label_dma; + dma_addr_t *dimm_dma; +}; + +struct ndtest_dimm { + struct device *dev; + struct nvdimm *nvdimm; + struct ndtest_blk_mmio *mmio; + struct nd_region *blk_region; + + dma_addr_t address; + unsigned long long flags; + unsigned long config_size; + void *label_area; + char *uuid_str; + + unsigned int size; + unsigned int handle; + unsigned int fail_cmd; + unsigned int physical_id; + unsigned int num_formats; + int id; + int fail_cmd_code; + u8 no_alias; +}; + +struct ndtest_config { + struct ndtest_dimm *dimms; + unsigned int dimm_count; + unsigned int dimm_start; + u8 num_regions; }; #endif /* NDTEST_H */ -- cgit v1.3.1 From 5e41396f723004a4e5710a0bb03259a443be1971 Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Tue, 22 Dec 2020 09:52:37 +0530 Subject: ndtest: Add dimm attributes This patch adds sysfs attributes for nvdimm and the dimm device. Signed-off-by: Santosh Sivaraj Link: https://lore.kernel.org/r/20201222042240.2983755-5-santosh@fossix.org Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/ndtest.c | 202 ++++++++++++++++++++++++++++++++++++- 1 file changed, 200 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index a82790013f8a..f7682e1d3efe 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -219,6 +219,203 @@ static void put_dimms(void *data) } } +static ssize_t handle_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%#x\n", dimm->handle); +} +static DEVICE_ATTR_RO(handle); + +static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%#x\n", dimm->fail_cmd); +} + +static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + unsigned long val; + ssize_t rc; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm->fail_cmd = val; + + return size; +} +static DEVICE_ATTR_RW(fail_cmd); + +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", dimm->fail_cmd_code); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + unsigned long val; + ssize_t rc; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm->fail_cmd_code = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + +static struct attribute *dimm_attributes[] = { + &dev_attr_handle.attr, + &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, + NULL, +}; + +static struct attribute_group dimm_attribute_group = { + .attrs = dimm_attributes, +}; + +static const struct attribute_group *dimm_attribute_groups[] = { + &dimm_attribute_group, + NULL, +}; + +static ssize_t phys_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%#x\n", dimm->physical_id); +} +static DEVICE_ATTR_RO(phys_id); + +static ssize_t vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "0x1234567\n"); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%04x-%02x-%04x-%08x", 0xabcd, + 0xa, 2016, ~(dimm->handle)); +} +static DEVICE_ATTR_RO(id); + +static ssize_t nvdimm_handle_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%#x\n", dimm->handle); +} + +static struct device_attribute dev_attr_nvdimm_show_handle = { + .attr = { .name = "handle", .mode = 0444 }, + .show = nvdimm_handle_show, +}; + +static ssize_t subsystem_vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%04x\n", 0); +} +static DEVICE_ATTR_RO(subsystem_vendor); + +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", 42); +} +static DEVICE_ATTR_RO(dirty_shutdown); + +static ssize_t formats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%d\n", dimm->num_formats); +} +static DEVICE_ATTR_RO(formats); + +static ssize_t format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + if (dimm->num_formats > 1) + return sprintf(buf, "0x201\n"); + + return sprintf(buf, "0x101\n"); +} +static DEVICE_ATTR_RO(format); + +static ssize_t format1_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x301\n"); +} +static DEVICE_ATTR_RO(format1); + +static umode_t ndtest_nvdimm_attr_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + if (a == &dev_attr_format1.attr && dimm->num_formats <= 1) + return 0; + + return a->mode; +} + +static struct attribute *ndtest_nvdimm_attributes[] = { + &dev_attr_nvdimm_show_handle.attr, + &dev_attr_vendor.attr, + &dev_attr_id.attr, + &dev_attr_phys_id.attr, + &dev_attr_subsystem_vendor.attr, + &dev_attr_dirty_shutdown.attr, + &dev_attr_formats.attr, + &dev_attr_format.attr, + &dev_attr_format1.attr, + NULL, +}; + +static const struct attribute_group ndtest_nvdimm_attribute_group = { + .name = "papr", + .attrs = ndtest_nvdimm_attributes, + .is_visible = ndtest_nvdimm_attr_visible, +}; + +static const struct attribute_group *ndtest_nvdimm_attribute_groups[] = { + &ndtest_nvdimm_attribute_group, + NULL, +}; + static int ndtest_dimm_register(struct ndtest_priv *priv, struct ndtest_dimm *dimm, int id) { @@ -230,7 +427,8 @@ static int ndtest_dimm_register(struct ndtest_priv *priv, set_bit(NDD_LABELING, &dimm_flags); } - dimm->nvdimm = nvdimm_create(priv->bus, dimm, NULL, dimm_flags, + dimm->nvdimm = nvdimm_create(priv->bus, dimm, + ndtest_nvdimm_attribute_groups, dimm_flags, NDTEST_SCM_DIMM_CMD_MASK, 0, NULL); if (!dimm->nvdimm) { dev_err(dev, "Error creating DIMM object for %pOF\n", priv->dn); @@ -239,7 +437,7 @@ static int ndtest_dimm_register(struct ndtest_priv *priv, dimm->dev = device_create_with_groups(ndtest_dimm_class, &priv->pdev.dev, - 0, dimm, NULL, + 0, dimm, dimm_attribute_groups, "test_dimm%d", id); if (!dimm->dev) { pr_err("Could not create dimm device attributes\n"); -- cgit v1.3.1 From 6fde2d4c8b25cec9589a4a58fd524b9d4e40c4b6 Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Tue, 22 Dec 2020 09:52:38 +0530 Subject: ndtest: Add regions and mappings to the test buses The bus config array is used to hold the regions and the respective mappings. This config based interface enables to change the dimm/region/namespace layouts easily. Signed-off-by: Santosh Sivaraj Link: https://lore.kernel.org/r/20201222042240.2983755-6-santosh@fossix.org Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/ndtest.c | 352 +++++++++++++++++++++++++++++++++++++ tools/testing/nvdimm/test/ndtest.h | 26 +++ 2 files changed, 378 insertions(+) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index f7682e1d3efe..821296b59bdc 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -23,6 +23,7 @@ enum { LABEL_SIZE = SZ_128K, NUM_INSTANCES = 2, NUM_DCR = 4, + NDTEST_MAX_MAPPING = 6, }; #define NDTEST_SCM_DIMM_CMD_MASK \ @@ -88,18 +89,161 @@ static struct ndtest_dimm dimm_group2[] = { }, }; +static struct ndtest_mapping region0_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = SZ_16M, + }, + { + .dimm = 1, + .position = 1, + .start = 0, + .size = SZ_16M, + } +}; + +static struct ndtest_mapping region1_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 1, + .position = 1, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 2, + .position = 2, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 3, + .position = 3, + .start = SZ_16M, + .size = SZ_16M, + }, +}; + +static struct ndtest_mapping region2_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = DIMM_SIZE, + }, +}; + +static struct ndtest_mapping region3_mapping[] = { + { + .dimm = 1, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_mapping region4_mapping[] = { + { + .dimm = 2, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_mapping region5_mapping[] = { + { + .dimm = 3, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_region bus0_regions[] = { + { + .type = ND_DEVICE_NAMESPACE_PMEM, + .num_mappings = ARRAY_SIZE(region0_mapping), + .mapping = region0_mapping, + .size = DIMM_SIZE, + .range_index = 1, + }, + { + .type = ND_DEVICE_NAMESPACE_PMEM, + .num_mappings = ARRAY_SIZE(region1_mapping), + .mapping = region1_mapping, + .size = DIMM_SIZE * 2, + .range_index = 2, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region2_mapping), + .mapping = region2_mapping, + .size = DIMM_SIZE, + .range_index = 3, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region3_mapping), + .mapping = region3_mapping, + .size = DIMM_SIZE, + .range_index = 4, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region4_mapping), + .mapping = region4_mapping, + .size = DIMM_SIZE, + .range_index = 5, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region5_mapping), + .mapping = region5_mapping, + .size = DIMM_SIZE, + .range_index = 6, + }, +}; + +static struct ndtest_mapping region6_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = DIMM_SIZE, + }, +}; + +static struct ndtest_region bus1_regions[] = { + { + .type = ND_DEVICE_NAMESPACE_IO, + .num_mappings = ARRAY_SIZE(region6_mapping), + .mapping = region6_mapping, + .size = DIMM_SIZE, + .range_index = 1, + }, +}; + static struct ndtest_config bus_configs[NUM_INSTANCES] = { /* bus 1 */ { .dimm_start = 0, .dimm_count = ARRAY_SIZE(dimm_group1), .dimms = dimm_group1, + .regions = bus0_regions, + .num_regions = ARRAY_SIZE(bus0_regions), }, /* bus 2 */ { .dimm_start = ARRAY_SIZE(dimm_group1), .dimm_count = ARRAY_SIZE(dimm_group2), .dimms = dimm_group2, + .regions = bus1_regions, + .num_regions = ARRAY_SIZE(bus1_regions), }, }; @@ -140,6 +284,95 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static int ndtest_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw) +{ + struct ndtest_dimm *dimm = ndbr->blk_provider_data; + struct ndtest_blk_mmio *mmio = dimm->mmio; + struct nd_region *nd_region = &ndbr->nd_region; + unsigned int lane; + + if (!mmio) + return -ENOMEM; + + lane = nd_region_acquire_lane(nd_region); + if (rw) + memcpy(mmio->base + dpa, iobuf, len); + else { + memcpy(iobuf, mmio->base + dpa, len); + arch_invalidate_pmem(mmio->base + dpa, len); + } + + nd_region_release_lane(nd_region, lane); + + return 0; +} + +static int ndtest_blk_region_enable(struct nvdimm_bus *nvdimm_bus, + struct device *dev) +{ + struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nvdimm *nvdimm; + struct ndtest_dimm *dimm; + struct ndtest_blk_mmio *mmio; + + nvdimm = nd_blk_region_to_dimm(ndbr); + dimm = nvdimm_provider_data(nvdimm); + + nd_blk_region_set_provider_data(ndbr, dimm); + dimm->blk_region = to_nd_region(dev); + + mmio = devm_kzalloc(dev, sizeof(struct ndtest_blk_mmio), GFP_KERNEL); + if (!mmio) + return -ENOMEM; + + mmio->base = (void __iomem *) devm_nvdimm_memremap( + dev, dimm->address, 12, nd_blk_memremap_flags(ndbr)); + if (!mmio->base) { + dev_err(dev, "%s failed to map blk dimm\n", nvdimm_name(nvdimm)); + return -ENOMEM; + } + mmio->size = dimm->size; + mmio->base_offset = 0; + + dimm->mmio = mmio; + + return 0; +} + +static struct nfit_test_resource *ndtest_resource_lookup(resource_size_t addr) +{ + int i; + + for (i = 0; i < NUM_INSTANCES; i++) { + struct nfit_test_resource *n, *nfit_res = NULL; + struct ndtest_priv *t = instances[i]; + + if (!t) + continue; + spin_lock(&ndtest_lock); + list_for_each_entry(n, &t->resources, list) { + if (addr >= n->res.start && (addr < n->res.start + + resource_size(&n->res))) { + nfit_res = n; + break; + } else if (addr >= (unsigned long) n->buf + && (addr < (unsigned long) n->buf + + resource_size(&n->res))) { + nfit_res = n; + break; + } + } + spin_unlock(&ndtest_lock); + if (nfit_res) + return nfit_res; + } + + pr_warn("Failed to get resource\n"); + + return NULL; +} + static void ndtest_release_resource(void *data) { struct nfit_test_resource *res = data; @@ -207,6 +440,119 @@ buf_err: return NULL; } +static ssize_t range_index_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct ndtest_region *region = nd_region_provider_data(nd_region); + + return sprintf(buf, "%d\n", region->range_index); +} +static DEVICE_ATTR_RO(range_index); + +static struct attribute *ndtest_region_attributes[] = { + &dev_attr_range_index.attr, + NULL, +}; + +static const struct attribute_group ndtest_region_attribute_group = { + .name = "papr", + .attrs = ndtest_region_attributes, +}; + +static const struct attribute_group *ndtest_region_attribute_groups[] = { + &ndtest_region_attribute_group, + NULL, +}; + +static int ndtest_create_region(struct ndtest_priv *p, + struct ndtest_region *region) +{ + struct nd_mapping_desc mappings[NDTEST_MAX_MAPPING]; + struct nd_blk_region_desc ndbr_desc; + struct nd_interleave_set *nd_set; + struct nd_region_desc *ndr_desc; + struct resource res; + int i, ndimm = region->mapping[0].dimm; + u64 uuid[2]; + + memset(&res, 0, sizeof(res)); + memset(&mappings, 0, sizeof(mappings)); + memset(&ndbr_desc, 0, sizeof(ndbr_desc)); + ndr_desc = &ndbr_desc.ndr_desc; + + if (!ndtest_alloc_resource(p, region->size, &res.start)) + return -ENOMEM; + + res.end = res.start + region->size - 1; + ndr_desc->mapping = mappings; + ndr_desc->res = &res; + ndr_desc->provider_data = region; + ndr_desc->attr_groups = ndtest_region_attribute_groups; + + if (uuid_parse(p->config->dimms[ndimm].uuid_str, (uuid_t *)uuid)) { + pr_err("failed to parse UUID\n"); + return -ENXIO; + } + + nd_set = devm_kzalloc(&p->pdev.dev, sizeof(*nd_set), GFP_KERNEL); + if (!nd_set) + return -ENOMEM; + + nd_set->cookie1 = cpu_to_le64(uuid[0]); + nd_set->cookie2 = cpu_to_le64(uuid[1]); + nd_set->altcookie = nd_set->cookie1; + ndr_desc->nd_set = nd_set; + + if (region->type == ND_DEVICE_NAMESPACE_BLK) { + mappings[0].start = 0; + mappings[0].size = DIMM_SIZE; + mappings[0].nvdimm = p->config->dimms[ndimm].nvdimm; + + ndr_desc->mapping = &mappings[0]; + ndr_desc->num_mappings = 1; + ndr_desc->num_lanes = 1; + ndbr_desc.enable = ndtest_blk_region_enable; + ndbr_desc.do_io = ndtest_blk_do_io; + region->region = nvdimm_blk_region_create(p->bus, ndr_desc); + + goto done; + } + + for (i = 0; i < region->num_mappings; i++) { + ndimm = region->mapping[i].dimm; + mappings[i].start = region->mapping[i].start; + mappings[i].size = region->mapping[i].size; + mappings[i].position = region->mapping[i].position; + mappings[i].nvdimm = p->config->dimms[ndimm].nvdimm; + } + + ndr_desc->num_mappings = region->num_mappings; + region->region = nvdimm_pmem_region_create(p->bus, ndr_desc); + +done: + if (!region->region) { + dev_err(&p->pdev.dev, "Error registering region %pR\n", + ndr_desc->res); + return -ENXIO; + } + + return 0; +} + +static int ndtest_init_regions(struct ndtest_priv *p) +{ + int i, ret = 0; + + for (i = 0; i < p->config->num_regions; i++) { + ret = ndtest_create_region(p, &p->config->regions[i]); + if (ret) + return ret; + } + + return 0; +} + static void put_dimms(void *data) { struct ndtest_priv *p = data; @@ -552,6 +898,10 @@ static int ndtest_probe(struct platform_device *pdev) if (rc) goto err; + rc = ndtest_init_regions(p); + if (rc) + goto err; + rc = devm_add_action_or_reset(&pdev->dev, put_dimms, p); if (rc) goto err; @@ -617,6 +967,8 @@ static __init int ndtest_init(void) dax_pmem_compat_test(); #endif + nfit_test_setup(ndtest_resource_lookup, NULL); + ndtest_dimm_class = class_create(THIS_MODULE, "nfit_test_dimm"); if (IS_ERR(ndtest_dimm_class)) { rc = PTR_ERR(ndtest_dimm_class); diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h index e607d72ffed1..8f27ad6f7319 100644 --- a/tools/testing/nvdimm/test/ndtest.h +++ b/tools/testing/nvdimm/test/ndtest.h @@ -20,6 +20,15 @@ struct ndtest_priv { dma_addr_t *dimm_dma; }; +struct ndtest_blk_mmio { + void __iomem *base; + u64 size; + u64 base_offset; + u32 line_size; + u32 num_lines; + u32 table_size; +}; + struct ndtest_dimm { struct device *dev; struct nvdimm *nvdimm; @@ -42,8 +51,25 @@ struct ndtest_dimm { u8 no_alias; }; +struct ndtest_mapping { + u64 start; + u64 size; + u8 position; + u8 dimm; +}; + +struct ndtest_region { + struct nd_region *region; + struct ndtest_mapping *mapping; + u64 size; + u8 type; + u8 num_mappings; + u8 range_index; +}; + struct ndtest_config { struct ndtest_dimm *dimms; + struct ndtest_region *regions; unsigned int dimm_count; unsigned int dimm_start; u8 num_regions; -- cgit v1.3.1 From 14ccef10e53e4c303570d2ee2d49e45be1118e99 Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Tue, 22 Dec 2020 09:52:39 +0530 Subject: ndtest: Add nvdimm control functions Add functions to support ND_CMD_GET_CONFIG_SIZE, ND_CMD_SET_CONFIG_DATA and ND_CMD_GET_CONFIG_DATA. Signed-off-by: Santosh Sivaraj Link: https://lore.kernel.org/r/20201222042240.2983755-7-santosh@fossix.org Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/ndtest.c | 51 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index 821296b59bdc..dc1e3636616a 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -254,6 +254,45 @@ static inline struct ndtest_priv *to_ndtest_priv(struct device *dev) return container_of(pdev, struct ndtest_priv, pdev); } +static int ndtest_config_get(struct ndtest_dimm *p, unsigned int buf_len, + struct nd_cmd_get_config_data_hdr *hdr) +{ + unsigned int len; + + if ((hdr->in_offset + hdr->in_length) > LABEL_SIZE) + return -EINVAL; + + hdr->status = 0; + len = min(hdr->in_length, LABEL_SIZE - hdr->in_offset); + memcpy(hdr->out_buf, p->label_area + hdr->in_offset, len); + + return buf_len - len; +} + +static int ndtest_config_set(struct ndtest_dimm *p, unsigned int buf_len, + struct nd_cmd_set_config_hdr *hdr) +{ + unsigned int len; + + if ((hdr->in_offset + hdr->in_length) > LABEL_SIZE) + return -EINVAL; + + len = min(hdr->in_length, LABEL_SIZE - hdr->in_offset); + memcpy(p->label_area + hdr->in_offset, hdr->in_buf, len); + + return buf_len - len; +} + +static int ndtest_get_config_size(struct ndtest_dimm *dimm, unsigned int buf_len, + struct nd_cmd_get_config_size *size) +{ + size->status = 0; + size->max_xfer = 8; + size->config_size = dimm->config_size; + + return 0; +} + static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -275,12 +314,24 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc, switch (cmd) { case ND_CMD_GET_CONFIG_SIZE: + *cmd_rc = ndtest_get_config_size(dimm, buf_len, buf); + break; case ND_CMD_GET_CONFIG_DATA: + *cmd_rc = ndtest_config_get(dimm, buf_len, buf); + break; case ND_CMD_SET_CONFIG_DATA: + *cmd_rc = ndtest_config_set(dimm, buf_len, buf); + break; default: return -EINVAL; } + /* Failures for a DIMM can be injected using fail_cmd and + * fail_cmd_code, see the device attributes below + */ + if ((1 << cmd) & dimm->fail_cmd) + return dimm->fail_cmd_code ? dimm->fail_cmd_code : -EIO; + return 0; } -- cgit v1.3.1 From 50f558a5fe16b385cf1427b2a96149f4f68952d9 Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Tue, 22 Dec 2020 09:52:40 +0530 Subject: ndtest: Add papr health related flags sysfs attibutes to show health related flags are added. Signed-off-by: Santosh Sivaraj Link: https://lore.kernel.org/r/20201222042240.2983755-8-santosh@fossix.org Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/ndtest.c | 41 ++++++++++++++++++++++++++++++++++++++ tools/testing/nvdimm/test/ndtest.h | 31 ++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index dc1e3636616a..6862915f1fb0 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -86,6 +86,9 @@ static struct ndtest_dimm dimm_group2[] = { .uuid_str = "ca0817e2-b618-11ea-9db3-507b9ddc0f72", .physical_id = 0, .num_formats = 1, + .flags = PAPR_PMEM_UNARMED | PAPR_PMEM_EMPTY | + PAPR_PMEM_SAVE_FAILED | PAPR_PMEM_SHUTDOWN_DIRTY | + PAPR_PMEM_HEALTH_FATAL, }, }; @@ -789,6 +792,40 @@ static umode_t ndtest_nvdimm_attr_visible(struct kobject *kobj, return a->mode; } +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + struct seq_buf s; + u64 flags; + + flags = dimm->flags; + + seq_buf_init(&s, buf, PAGE_SIZE); + if (flags & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (flags & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (flags & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (flags & PAPR_PMEM_SAVE_MASK) + seq_buf_printf(&s, "save_fail "); + + if (flags & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +static DEVICE_ATTR_RO(flags); + static struct attribute *ndtest_nvdimm_attributes[] = { &dev_attr_nvdimm_show_handle.attr, &dev_attr_vendor.attr, @@ -799,6 +836,7 @@ static struct attribute *ndtest_nvdimm_attributes[] = { &dev_attr_formats.attr, &dev_attr_format.attr, &dev_attr_format1.attr, + &dev_attr_flags.attr, NULL, }; @@ -824,6 +862,9 @@ static int ndtest_dimm_register(struct ndtest_priv *priv, set_bit(NDD_LABELING, &dimm_flags); } + if (dimm->flags & PAPR_PMEM_UNARMED_MASK) + set_bit(NDD_UNARMED, &dimm_flags); + dimm->nvdimm = nvdimm_create(priv->bus, dimm, ndtest_nvdimm_attribute_groups, dimm_flags, NDTEST_SCM_DIMM_CMD_MASK, 0, NULL); diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h index 8f27ad6f7319..2c54c9cbb90c 100644 --- a/tools/testing/nvdimm/test/ndtest.h +++ b/tools/testing/nvdimm/test/ndtest.h @@ -5,6 +5,37 @@ #include #include +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10)) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED) + struct ndtest_config; struct ndtest_priv { -- cgit v1.3.1 From 11df27f7fdf02cc2bb354358ad482e1fdd690589 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 28 Jan 2021 16:48:19 +0200 Subject: selftests: forwarding: Specify interface when invoking mausezahn Specify the interface through which packets should be transmitted so that the test will pass regardless of the libnet version against which mausezahn is linked. Fixes: cab14d1087d9 ("selftests: Add version of router_multipath.sh using nexthop objects") Fixes: 3d578d879517 ("selftests: forwarding: Test IPv4 weighted nexthops") Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/router_mpath_nh.sh | 2 +- tools/testing/selftests/net/forwarding/router_multipath.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 388e4492b81b..76efb1f8375e 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -203,7 +203,7 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d 1msec -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 79a209927962..464821c587a5 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -178,7 +178,7 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d 1msec -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) -- cgit v1.3.1 From 7f1b11ba3564a391169420d98162987a12d0795d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 28 Jan 2021 20:28:56 +0100 Subject: tools/power/turbostat: Fallback to an MSR read for EPB Commit 6d6501d912a9 ("tools/power/turbostat: Read energy_perf_bias from sysfs") converted turbostat to read the energy_perf_bias value from sysfs. However, older kernels which do not have that file yet, would fail. For those, fall back to the MSR reading. Fixes: 6d6501d912a9 ("tools/power/turbostat: Read energy_perf_bias from sysfs") Reported-by: Artem Bityutskiy Signed-off-by: Borislav Petkov Tested-by: Artem Bityutskiy Link: https://lkml.kernel.org/r/20210127132444.981120-1-dedekind1@gmail.com --- tools/power/x86/turbostat/turbostat.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 389ea5209a83..a7c4f0772e53 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1834,12 +1834,15 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) int get_epb(int cpu) { char path[128 + PATH_BYTES]; + unsigned long long msr; int ret, epb = -1; FILE *fp; sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); - fp = fopen_or_die(path, "r"); + fp = fopen(path, "r"); + if (!fp) + goto msr_fallback; ret = fscanf(fp, "%d", &epb); if (ret != 1) @@ -1848,6 +1851,11 @@ int get_epb(int cpu) fclose(fp); return epb; + +msr_fallback: + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); + + return msr & 0xf; } void get_apic_id(struct thread_data *t) -- cgit v1.3.1 From a3005b0f83f217c888393c6bf9cd36e3d1616bca Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 30 Jan 2021 11:14:25 +0100 Subject: selftests: netfilter: fix current year use date %Y instead of %G to read current year Problem appeared when running lkp-tests on 01/01/2021 Fixes: 48d072c4e8cd ("selftests: netfilter: add time counter check") Reported-by: kernel test robot Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_meta.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index 087f0e6e71ce..f33154c04d34 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -23,7 +23,7 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo trap cleanup EXIT -currentyear=$(date +%G) +currentyear=$(date +%Y) lastyear=$((currentyear-1)) ip netns exec "$ns0" nft -f /dev/stdin < Date: Wed, 3 Feb 2021 23:37:14 +0300 Subject: selftests/tls: fix selftest with CHACHA20-POLY1305 TLS selftests were broken also because of use of structure that was not exported to UAPI. Fix by defining the union in tests. Fixes: 4f336e88a870 (selftests/tls: add CHACHA20-POLY1305 to tls selftests) Reported-by: Rong Chen Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1612384634-5377-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e0088c2d38a5..426d07875a48 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -133,7 +133,10 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha) FIXTURE_SETUP(tls) { - union tls_crypto_context tls12; + union { + struct tls12_crypto_info_aes_gcm_128 aes128; + struct tls12_crypto_info_chacha20_poly1305 chacha20; + } tls12; struct sockaddr_in addr; socklen_t len; int sfd, ret; @@ -143,14 +146,16 @@ FIXTURE_SETUP(tls) len = sizeof(addr); memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = variant->tls_version; - tls12.info.cipher_type = variant->cipher_type; switch (variant->cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: - tls12_sz = sizeof(tls12_crypto_info_chacha20_poly1305); + tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305); + tls12.chacha20.info.version = variant->tls_version; + tls12.chacha20.info.cipher_type = variant->cipher_type; break; case TLS_CIPHER_AES_GCM_128: - tls12_sz = sizeof(tls12_crypto_info_aes_gcm_128); + tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128); + tls12.aes128.info.version = variant->tls_version; + tls12.aes128.info.cipher_type = variant->cipher_type; break; default: tls12_sz = 0; -- cgit v1.3.1 From 647b8dd5184665432cc8a2b5bca46a201f690c37 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 4 Feb 2021 20:50:34 +0300 Subject: selftests: txtimestamp: fix compilation issue PACKET_TX_TIMESTAMP is defined in if_packet.h but it is not included in test. Include it instead of otherwise the error of redefinition arrives. Also fix the compiler warning about ambiguous control flow by adding explicit braces. Fixes: 8fe2f761cae9 ("net-timestamp: expand documentation") Suggested-by: Willem de Bruijn Signed-off-by: Vadim Fedorenko Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/1612461034-24524-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/txtimestamp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 490a8cca708a..fabb1d555ee5 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -495,12 +495,12 @@ static void do_test(int family, unsigned int report_opt) total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { if (family == PF_INET) total_len += sizeof(struct iphdr); else total_len += sizeof(struct ipv6hdr); - + } /* special case, only rawv6_sendmsg: * pass proto in sin6_port if not connected * also see ANK comment in net/ipv4/raw.c -- cgit v1.3.1 From 9f5f8ec50165630cfc49897410b30997d4d677b5 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 6 Feb 2021 00:33:24 +1300 Subject: dma-mapping: benchmark: use u8 for reserved field in uAPI structure The original code put five u32 before a u64 expansion[10] array. Five is odd, this will cause trouble in the extension of the structure by adding new features. This patch moves to use u8 for reserved field to avoid future alignment risk. Meanwhile, it also clears the memory of struct map_benchmark in tools, otherwise, if users use old version to run on newer kernel, the random expansion value will cause side effect on newer kernel. Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- kernel/dma/map_benchmark.c | 2 +- tools/testing/selftests/dma/dma_map_benchmark.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 1b1b8ff875cb..da95df381483 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -36,7 +36,7 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u64 expansion[10]; /* For future use */ + __u8 expansion[84]; /* For future use */ }; struct map_benchmark_data { diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 7065163a8388..537d65968c48 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,7 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u64 expansion[10]; /* For future use */ + __u8 expansion[84]; /* For future use */ }; int main(int argc, char **argv) @@ -102,6 +103,7 @@ int main(int argc, char **argv) exit(1); } + memset(&map, 0, sizeof(map)); map.seconds = seconds; map.threads = threads; map.node = node; -- cgit v1.3.1 From 36a6c843fd0d8e02506681577e96dabd203dd8e8 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 5 Feb 2021 13:43:21 -0500 Subject: entry: Use different define for selector variable in SUD Michael Kerrisk suggested that, from an API perspective, it is a bad idea to share the PR_SYS_DISPATCH_ defines between the prctl operation and the selector variable. Therefore, define two new constants to be used by SUD's selector variable and update the corresponding documentation and test cases. While this changes the API syscall user dispatch has never been part of a Linux release, it will show up for the first time in 5.11. Suggested-by: Michael Kerrisk (man-pages) Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210205184321.2062251-1-krisman@collabora.com --- Documentation/admin-guide/syscall-user-dispatch.rst | 4 ++-- include/uapi/linux/prctl.h | 3 +++ kernel/entry/syscall_user_dispatch.c | 4 ++-- .../selftests/syscall_user_dispatch/sud_benchmark.c | 8 +++++--- tools/testing/selftests/syscall_user_dispatch/sud_test.c | 14 ++++++++------ 5 files changed, 20 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/Documentation/admin-guide/syscall-user-dispatch.rst b/Documentation/admin-guide/syscall-user-dispatch.rst index a380d6515774..60314953c728 100644 --- a/Documentation/admin-guide/syscall-user-dispatch.rst +++ b/Documentation/admin-guide/syscall-user-dispatch.rst @@ -70,8 +70,8 @@ trampoline code on the vDSO, that trampoline is never intercepted. [selector] is a pointer to a char-sized region in the process memory region, that provides a quick way to enable disable syscall redirection thread-wide, without the need to invoke the kernel directly. selector -can be set to PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF. Any other -value should terminate the program with a SIGSYS. +can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK. +Any other value should terminate the program with a SIGSYS. Security Notes -------------- diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 90deb41c8a34..667f1aed091c 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -251,5 +251,8 @@ struct prctl_mm_map { #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +/* The control values for the user space selector when dispatch is enabled */ +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c index b0338a5625d9..c240302f56e2 100644 --- a/kernel/entry/syscall_user_dispatch.c +++ b/kernel/entry/syscall_user_dispatch.c @@ -50,10 +50,10 @@ bool syscall_user_dispatch(struct pt_regs *regs) if (unlikely(__get_user(state, sd->selector))) do_exit(SIGSEGV); - if (likely(state == PR_SYS_DISPATCH_OFF)) + if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW)) return false; - if (state != PR_SYS_DISPATCH_ON) + if (state != SYSCALL_DISPATCH_FILTER_BLOCK) do_exit(SIGSYS); } diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c index 6689f1183dbf..073a03702ff5 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c @@ -22,6 +22,8 @@ # define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif #ifdef __NR_syscalls @@ -55,8 +57,8 @@ unsigned long trapped_call_count = 0; unsigned long native_call_count = 0; char selector; -#define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON) -#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF) +#define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK) +#define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW) #define CALIBRATION_STEP 100000 #define CALIBRATE_TO_SECS 5 @@ -170,7 +172,7 @@ int main(void) syscall(MAGIC_SYSCALL_1); #ifdef TEST_BLOCKED_RETURN - if (selector == PR_SYS_DISPATCH_OFF) { + if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) { fprintf(stderr, "Failed to return with selector blocked.\n"); exit(-1); } diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index 6498b050ef89..b5d592d4099e 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -18,6 +18,8 @@ # define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif #ifndef SYS_USER_DISPATCH @@ -30,8 +32,8 @@ # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ #endif -#define SYSCALL_DISPATCH_ON(x) ((x) = 1) -#define SYSCALL_DISPATCH_OFF(x) ((x) = 0) +#define SYSCALL_DISPATCH_ON(x) ((x) = SYSCALL_DISPATCH_FILTER_BLOCK) +#define SYSCALL_DISPATCH_OFF(x) ((x) = SYSCALL_DISPATCH_FILTER_ALLOW) /* Test Summary: * @@ -56,7 +58,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) { - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; struct sysinfo info; int ret; @@ -79,7 +81,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) TEST(bad_prctl_param) { - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; int op; /* Invalid op */ @@ -220,7 +222,7 @@ TEST_SIGNAL(bad_selector, SIGSYS) sigset_t mask; struct sysinfo info; - glob_sel = 0; + glob_sel = SYSCALL_DISPATCH_FILTER_ALLOW; nr_syscalls_emulated = 0; si_code = 0; si_errno = 0; @@ -288,7 +290,7 @@ TEST(direct_dispatch_range) { int ret = 0; struct sysinfo info; - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; /* * Instead of calculating libc addresses; allow the entire -- cgit v1.3.1 From d52db800846f66d98a4e14c39cf88a06bcd9985f Mon Sep 17 00:00:00 2001 From: Rong Chen Date: Tue, 9 Feb 2021 13:42:21 -0800 Subject: selftests/vm: rename file run_vmtests to run_vmtests.sh Commit c2aa8afc36fa has renamed run_vmtests in Makefile, but the file still uses the old name. The kernel test robot reported the following issue: # selftests: vm: run_vmtests.sh # Warning: file run_vmtests.sh is missing! not ok 1 selftests: vm: run_vmtests.sh Link: https://lkml.kernel.org/r/20210205085507.1479894-1-rong.a.chen@intel.com Fixes: c2aa8afc36fa (selftests/vm: rename run_vmtests --> run_vmtests.sh) Signed-off-by: Rong Chen Reported-by: kernel test robot Reviewed-by: John Hubbard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/run_vmtests | 349 ------------------------------ tools/testing/selftests/vm/run_vmtests.sh | 349 ++++++++++++++++++++++++++++++ 2 files changed, 349 insertions(+), 349 deletions(-) delete mode 100755 tools/testing/selftests/vm/run_vmtests create mode 100755 tools/testing/selftests/vm/run_vmtests.sh (limited to 'tools') diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests deleted file mode 100755 index e953f3cd9664..000000000000 --- a/tools/testing/selftests/vm/run_vmtests +++ /dev/null @@ -1,349 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -#please run as root - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -mnt=./huge -exitcode=0 - -#get huge pagesize and freepages from /proc/meminfo -while read name size unit; do - if [ "$name" = "HugePages_Free:" ]; then - freepgs=$size - fi - if [ "$name" = "Hugepagesize:" ]; then - hpgsize_KB=$size - fi -done < /proc/meminfo - -# Simple hugetlbfs tests have a hardcoded minimum requirement of -# huge pages totaling 256MB (262144KB) in size. The userfaultfd -# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take -# both of these requirements into account and attempt to increase -# number of huge pages available. -nr_cpus=$(nproc) -hpgsize_MB=$((hpgsize_KB / 1024)) -half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) -needmem_KB=$((half_ufd_size_MB * 2 * 1024)) - -#set proper nr_hugepages -if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then - nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` - needpgs=$((needmem_KB / hpgsize_KB)) - tries=2 - while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do - lackpgs=$(( $needpgs - $freepgs )) - echo 3 > /proc/sys/vm/drop_caches - echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages - if [ $? -ne 0 ]; then - echo "Please run this test as root" - exit $ksft_skip - fi - while read name size unit; do - if [ "$name" = "HugePages_Free:" ]; then - freepgs=$size - fi - done < /proc/meminfo - tries=$((tries - 1)) - done - if [ $freepgs -lt $needpgs ]; then - printf "Not enough huge pages available (%d < %d)\n" \ - $freepgs $needpgs - exit 1 - fi -else - echo "no hugetlbfs support in kernel?" - exit 1 -fi - -#filter 64bit architectures -ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" -if [ -z $ARCH ]; then - ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'` -fi -VADDR64=0 -echo "$ARCH64STR" | grep $ARCH && VADDR64=1 - -mkdir $mnt -mount -t hugetlbfs none $mnt - -echo "---------------------" -echo "running hugepage-mmap" -echo "---------------------" -./hugepage-mmap -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -shmmax=`cat /proc/sys/kernel/shmmax` -shmall=`cat /proc/sys/kernel/shmall` -echo 268435456 > /proc/sys/kernel/shmmax -echo 4194304 > /proc/sys/kernel/shmall -echo "--------------------" -echo "running hugepage-shm" -echo "--------------------" -./hugepage-shm -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -echo $shmmax > /proc/sys/kernel/shmmax -echo $shmall > /proc/sys/kernel/shmall - -echo "-------------------" -echo "running map_hugetlb" -echo "-------------------" -./map_hugetlb -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "NOTE: The above hugetlb tests provide minimal coverage. Use" -echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" -echo " hugetlb regression testing." - -echo "---------------------------" -echo "running map_fixed_noreplace" -echo "---------------------------" -./map_fixed_noreplace -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "------------------------------------------------------" -echo "running: gup_test -u # get_user_pages_fast() benchmark" -echo "------------------------------------------------------" -./gup_test -u -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "------------------------------------------------------" -echo "running: gup_test -a # pin_user_pages_fast() benchmark" -echo "------------------------------------------------------" -./gup_test -a -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "------------------------------------------------------------" -echo "# Dump pages 0, 19, and 4096, using pin_user_pages:" -echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test" -echo "------------------------------------------------------------" -./gup_test -ct -F 0x1 0 19 0x1000 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-------------------" -echo "running userfaultfd" -echo "-------------------" -./userfaultfd anon 20 16 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "---------------------------" -echo "running userfaultfd_hugetlb" -echo "---------------------------" -# Test requires source and destination huge pages. Size of source -# (half_ufd_size_MB) is passed as argument to test. -./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -rm -f $mnt/ufd_test_file - -echo "-------------------------" -echo "running userfaultfd_shmem" -echo "-------------------------" -./userfaultfd shmem 20 16 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -#cleanup -umount $mnt -rm -rf $mnt -echo $nr_hugepgs > /proc/sys/vm/nr_hugepages - -echo "-----------------------" -echo "running compaction_test" -echo "-----------------------" -./compaction_test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "----------------------" -echo "running on-fault-limit" -echo "----------------------" -sudo -u nobody ./on-fault-limit -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "--------------------" -echo "running map_populate" -echo "--------------------" -./map_populate -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-------------------------" -echo "running mlock-random-test" -echo "-------------------------" -./mlock-random-test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "--------------------" -echo "running mlock2-tests" -echo "--------------------" -./mlock2-tests -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-------------------" -echo "running mremap_test" -echo "-------------------" -./mremap_test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-----------------" -echo "running thuge-gen" -echo "-----------------" -./thuge-gen -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -if [ $VADDR64 -ne 0 ]; then -echo "-----------------------------" -echo "running virtual_address_range" -echo "-----------------------------" -./virtual_address_range -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-----------------------------" -echo "running virtual address 128TB switch test" -echo "-----------------------------" -./va_128TBswitch -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -fi # VADDR64 - -echo "------------------------------------" -echo "running vmalloc stability smoke test" -echo "------------------------------------" -./test_vmalloc.sh smoke -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "------------------------------------" -echo "running MREMAP_DONTUNMAP smoke test" -echo "------------------------------------" -./mremap_dontunmap -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "running HMM smoke test" -echo "------------------------------------" -./test_hmm.sh smoke -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -exit $exitcode diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh new file mode 100755 index 000000000000..e953f3cd9664 --- /dev/null +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -0,0 +1,349 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +#please run as root + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +mnt=./huge +exitcode=0 + +#get huge pagesize and freepages from /proc/meminfo +while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + if [ "$name" = "Hugepagesize:" ]; then + hpgsize_KB=$size + fi +done < /proc/meminfo + +# Simple hugetlbfs tests have a hardcoded minimum requirement of +# huge pages totaling 256MB (262144KB) in size. The userfaultfd +# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take +# both of these requirements into account and attempt to increase +# number of huge pages available. +nr_cpus=$(nproc) +hpgsize_MB=$((hpgsize_KB / 1024)) +half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) +needmem_KB=$((half_ufd_size_MB * 2 * 1024)) + +#set proper nr_hugepages +if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then + nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` + needpgs=$((needmem_KB / hpgsize_KB)) + tries=2 + while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do + lackpgs=$(( $needpgs - $freepgs )) + echo 3 > /proc/sys/vm/drop_caches + echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages + if [ $? -ne 0 ]; then + echo "Please run this test as root" + exit $ksft_skip + fi + while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + done < /proc/meminfo + tries=$((tries - 1)) + done + if [ $freepgs -lt $needpgs ]; then + printf "Not enough huge pages available (%d < %d)\n" \ + $freepgs $needpgs + exit 1 + fi +else + echo "no hugetlbfs support in kernel?" + exit 1 +fi + +#filter 64bit architectures +ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" +if [ -z $ARCH ]; then + ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'` +fi +VADDR64=0 +echo "$ARCH64STR" | grep $ARCH && VADDR64=1 + +mkdir $mnt +mount -t hugetlbfs none $mnt + +echo "---------------------" +echo "running hugepage-mmap" +echo "---------------------" +./hugepage-mmap +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +shmmax=`cat /proc/sys/kernel/shmmax` +shmall=`cat /proc/sys/kernel/shmall` +echo 268435456 > /proc/sys/kernel/shmmax +echo 4194304 > /proc/sys/kernel/shmall +echo "--------------------" +echo "running hugepage-shm" +echo "--------------------" +./hugepage-shm +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi +echo $shmmax > /proc/sys/kernel/shmmax +echo $shmall > /proc/sys/kernel/shmall + +echo "-------------------" +echo "running map_hugetlb" +echo "-------------------" +./map_hugetlb +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "NOTE: The above hugetlb tests provide minimal coverage. Use" +echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" +echo " hugetlb regression testing." + +echo "---------------------------" +echo "running map_fixed_noreplace" +echo "---------------------------" +./map_fixed_noreplace +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "------------------------------------------------------" +echo "running: gup_test -u # get_user_pages_fast() benchmark" +echo "------------------------------------------------------" +./gup_test -u +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "------------------------------------------------------" +echo "running: gup_test -a # pin_user_pages_fast() benchmark" +echo "------------------------------------------------------" +./gup_test -a +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "------------------------------------------------------------" +echo "# Dump pages 0, 19, and 4096, using pin_user_pages:" +echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test" +echo "------------------------------------------------------------" +./gup_test -ct -F 0x1 0 19 0x1000 +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "-------------------" +echo "running userfaultfd" +echo "-------------------" +./userfaultfd anon 20 16 +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "---------------------------" +echo "running userfaultfd_hugetlb" +echo "---------------------------" +# Test requires source and destination huge pages. Size of source +# (half_ufd_size_MB) is passed as argument to test. +./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi +rm -f $mnt/ufd_test_file + +echo "-------------------------" +echo "running userfaultfd_shmem" +echo "-------------------------" +./userfaultfd shmem 20 16 +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +#cleanup +umount $mnt +rm -rf $mnt +echo $nr_hugepgs > /proc/sys/vm/nr_hugepages + +echo "-----------------------" +echo "running compaction_test" +echo "-----------------------" +./compaction_test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "----------------------" +echo "running on-fault-limit" +echo "----------------------" +sudo -u nobody ./on-fault-limit +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "--------------------" +echo "running map_populate" +echo "--------------------" +./map_populate +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "-------------------------" +echo "running mlock-random-test" +echo "-------------------------" +./mlock-random-test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "--------------------" +echo "running mlock2-tests" +echo "--------------------" +./mlock2-tests +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "-------------------" +echo "running mremap_test" +echo "-------------------" +./mremap_test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "-----------------" +echo "running thuge-gen" +echo "-----------------" +./thuge-gen +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +if [ $VADDR64 -ne 0 ]; then +echo "-----------------------------" +echo "running virtual_address_range" +echo "-----------------------------" +./virtual_address_range +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "-----------------------------" +echo "running virtual address 128TB switch test" +echo "-----------------------------" +./va_128TBswitch +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi +fi # VADDR64 + +echo "------------------------------------" +echo "running vmalloc stability smoke test" +echo "------------------------------------" +./test_vmalloc.sh smoke +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +echo "------------------------------------" +echo "running MREMAP_DONTUNMAP smoke test" +echo "------------------------------------" +./mremap_dontunmap +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +echo "running HMM smoke test" +echo "------------------------------------" +./test_hmm.sh smoke +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +exit $exitcode -- cgit v1.3.1